@misc{nih_nih_2023, title = {{NIH} {Genomic} {Data} {Sharing} {Policy}}, url = {https://sharing.nih.gov/genomic-data-sharing-policy}, abstract = {The National Institutes of Health (NIH) announces the final Genomic Data Sharing (GDS) Policy that promotes sharing, for research purposes, of large-scale human and non-human genomic1 data generated from NIH-funded research. A summary of public comments on the draft GDS Policy and NIH’s responses are also provided.}, urldate = {2021-10-18}, author = {NIH}, year = {2023}, } @article{kelleher_htsget_2019, title = {htsget: a protocol for securely streaming genomic data}, volume = {35}, issn = {1367-4803}, shorttitle = {htsget}, url = {https://doi.org/10.1093/bioinformatics/bty492}, doi = {10.1093/bioinformatics/bty492}, abstract = {Standardized interfaces for efficiently accessing high-throughput sequencing data are a fundamental requirement for large-scale genomic data sharing. We have developed htsget, a protocol for secure, efficient and reliable access to sequencing read and variation data. We demonstrate four independent client and server implementations, and the results of a comprehensive interoperability demonstration.http://samtools.github.io/hts-specs/htsget.htmlSupplementary data are available at Bioinformatics online.}, number = {1}, urldate = {2023-09-05}, journal = {Bioinformatics}, author = {Kelleher, Jerome and Lin, Mike and Albach, C H and Birney, Ewan and Davies, Robert and Gourtovaia, Marina and Glazer, David and Gonzalez, Cristina Y and Jackson, David K and Kemp, Aaron and Marshall, John and Nowak, Andrew and Senf, Alexander and Tovar-Corona, Jaime M and Vikhorev, Alexander and Keane, Thomas M and {GA4GH Streaming Task Team}}, month = jan, year = {2019}, pages = {119--121}, } @article{haring_empowering_2021, title = {Empowering {Equitable} {Data} {Use} {Partnerships} and {Indigenous} {Data} {Sovereignties} {Amid} {Pandemic} {Genomics}}, volume = {9}, issn = {2296-2565}, url = {https://www.frontiersin.org/articles/10.3389/fpubh.2021.742467}, abstract = {The COVID-19 pandemic has inequitably impacted Indigenous communities in the United States. In this emergency state that highlighted existing inadequacies in US government and tribal public health infrastructures, many tribal nations contracted with commercial entities and other organization types to conduct rapid diagnostic and antibody testing, often based on proprietary technologies specific to the novel pathogen. They also partnered with public-private enterprises on clinical trials to further the development of vaccines. Indigenous people contributed biological samples for assessment and, in many cases, broadly consented for indefinite use for future genomics research. A concern is that the need for crisis aid may have placed Indigenous communities in a position to forego critical review of data use agreements by tribal research governances. In effect, tribal nations were placed in the unenviable position of trading short-term public health assistance for long-term, unrestricted access to Indigenous genomes that may disempower future tribal sovereignties over community members' data. Diagnostic testing, specimen collection, and vaccine research is ongoing; thus, our aim is to outline pathways to trust that center current and future equitable relationship-building between tribal entities and public-private interests. These pathways can be utilized to increase Indigenous communities' trust of external partners and share understanding of expectations for and execution of data protections. We discuss how to navigate genomic-based data use agreements in the context of pathogen genomics. While we focus on US tribal nations, Indigenous genomic data sovereignties relate to global Indigenous nations regardless of colonial government recognition.}, urldate = {2023-08-28}, journal = {Frontiers in Public Health}, author = {Haring, Rodney C. and Blanchard, Jessica W. and Korchmaros, Josephine D. and Lund, Justin R. and Haozous, Emily A. and Raphaelito, Josie and Hudson, Maui and Tsosie, Krystal S.}, year = {2021}, } @misc{noauthor_htsget_nodate, title = {htsget: a protocol for securely streaming genomic data. - {Abstract} - {Europe} {PMC}}, url = {https://europepmc.org/article/MED/29931085}, urldate = {2023-08-24}, } @misc{australian_biocommons_protection_2022, title = {Protection of genomic data and the {Australian} {Privacy} {Act}: is genomic data ‘personal information’?}, shorttitle = {Protection of genomic data and the {Australian} {Privacy} {Act}}, url = {https://www.youtube.com/watch?v=Iaei-9Gu-AI}, abstract = {It is easy to assume that genomic data will be captured by legal definitions of ‘health information’ and ‘genetic information’, but the legal meaning of ‘genetic information’ need not align with scientific categories. There are many different types of genomic data, with varied characteristics, uses and applications. Clarifying when genomic data is covered by the Privacy Act 1988 (Cth) is an ongoing evaluative exercise but is important for at least 3 reasons: 1. those subject to the Privacy Act need to be able to confidently navigate their responsibilities 2. understanding current controls is a prerequisite for meaningful external critique (and this is particularly important at a time when the Privacy Act is under review), and 3. while legislation that applies to state public sector agencies is generally distinct from the Privacy Act there are similarities that extend the relevance of the question when is genomic data ‘personal information’ under the Privacy Act? In this presentation, Mark will explore the relationship between the legal concept of genetic information and the concept of genomic data relevant to health and medical research, reflect on the characteristics of each, and the possibility of more clearly identifying the legal rights and responsibilities which attach to the use and disclosure of genomic data in the future. Speaker: Mark Taylor, Professor in Health Law and Regulation, Melbourne Law School; Director, Health, Law and Emerging Technologies (HeLEX), University of Melbourne. This webinar was recorded on 16 February 2022. The slides can be downloaded as a PDF from Zenodo: https://zenodo.org/record/6423621\#.Yk... See upcoming webinars and workshops on our website: https://www.biocommons.org.au/webinar... Captions are auto-generated.}, urldate = {2023-08-24}, author = {{Australian BioCommons}}, month = apr, year = {2022}, } @misc{australian_biocommons_genomic_2023, title = {Genomic data - improving discovery and access management}, url = {https://www.youtube.com/watch?v=9SD6gpjDGWE}, abstract = {Australian human genome initiatives are generating vast amounts of human genome data to understand the cause of complex diseases, improve diagnosis / early disease detection and identify tailored treatment options. To achieve this, genomic data needs to be compared between multiple individuals and cohorts, often across efforts/jurisdictions, at national or global scales, and requires the genomic data to be findable, searchable, shareable, and linkable to analytical capabilities. The Human Genome Platform Project aims to make it as easy as possible to securely and responsibly share human genome research data nationally and internationally. The project is building a ‘services toolbox’ that combines best practice technologies in human genome data sharing. In this webinar the project team will discuss three important aspects of human genomic data sharing: i) discovery of genomic cohorts and the GA4GH Beacon protocol that enables this functionality across multiple sites ii) streamlining of data access request management; the Garvan will share experience using the Resource Entitlement Management System (REMS) software package. iii) community management functionality of CILogon and how that has enabled Identity Management in HGPP ---------- Speakers Jess Holliday, Program Manager - Human Genome Informatics, Australian BioCommons Dr Andrew Patterson, Human Genomics Data Technology Lead, University of Melbourne Mustafa Syed, Bioinformatics System Integration Manager, Children’s Cancer Institute Associate Professor Sarah Kummerfeld, Director - Data Science, Garvan Institute of Medical Research ---------- Who the webinar is for: This webinar is for people who are interested in implementation of a similar system for secure and responsible data sharing whether that be for genomics or other applications. ---------- This webinar was recorded on 14 June 2023. See upcoming webinars and workshops on our website: https://www.biocommons.org.au/webinar... Captions are auto-generated by Otter.ai.}, urldate = {2023-08-24}, author = {{Australian BioCommons}}, month = jun, year = {2023}, } @misc{shadbolt_enhancing_2022, title = {Enhancing {Australia}'s capability for secure and responsible sharing of human genomics research data}, url = {https://zenodo.org/record/7242979}, abstract = {The Human Genomes Platforms Project (HGPP) aims to leverage best practice technologies and global standards to accelerate Findable, Accessible, Interoperable, Reuseable (FAIR) human genomics data sharing in Australia. Involving leading Australian human genomics research organisations, along with national computing infrastructure partners, the HGPP is breaking down silos and facilitating the deployment of much needed genomic data sharing infrastructure in Australia.  The main project themes are: virtual cohort assembly; data access committee (DAC) automation; federated identity and access management; data and metadata archiving; and documentation, communications and training. We are collaborating with partners internationally to bring best practice technologies to Australia.  This includes:  Investigating emerging Global Alliance for Genomics and Health (GA4GH) standards including Beacon v2 for virtual cohort assembly and Passports to enable access to data Assessing software including Resource Entitlement Management System (REMS) and Data Use Oversight System (DUOS) to facilitate the Data Access Request and Approval process undertaken by DACs Establishing a national life science authentication service using CILogon Investigating the feasibility of a Federated European Genome-Phenome Archive (FEGA) node in Australia  Ensuring all aspects of the project are well-documented as well as hosting webinars and training sessions to ensure outputs are easily adopted by the community Here we introduce the HGPP, an Australian Research Data Commons (ARDC) and Bioplatforms Australia funded project with in-kind contributions from partner organisations. We describe our achievements to date, outline our upcoming plans for the project and how they will improve the landscape of human genomics data sharing for Australian researchers.}, urldate = {2023-08-24}, author = {Shadbolt, Marion and Boughtwood, Tiffany and Christiansen, Jeff and Copty, Joe and Cowley, Mark and Davies, Kylie and Downton, Matthew and Druken, Kelsey and Evans, Ben and Gaff, Clara and Gilbert, Andrew and Hall, Christina and Hobbs, Matthew and Hofmann, Oliver and Holliday, Jessica and Kaplan, Warren and Koufariotis, Ross and Kummerfeld, Sarah and Leonard, Conrad and Lin, Angela and Lonie, Andrew and Marks, Heath and McCafferty, Siobhann and Munro, David and Patterson, Andrew and Pearson, John and Pope, Bernard and Ravishankar, Shyamsundar and Reisinger, Florian and Robinson, Andrew and San Kho Lin, Victor and Scullen, John and Syed, Mustafa and Taouk, Kamile and Treloar, Andrew and Wang, Jingbo and Wong-Erasmus, Marie and Wood, Scott}, month = oct, year = {2022}, doi = {10.5281/zenodo.7242979}, keywords = {data sharing, Australia, genomics, human, archiving, authorisation, authentication, DAC, CILogon, REMS, FEGA}, } @misc{taouk_establishing_2022, title = {Establishing a national {Beacon} version 2 network for real-time genomics data discovery}, url = {https://zenodo.org/record/7402705}, abstract = {Aggregating data within the realm of rare diseases and cancers is paramount to gaining statistically significant insights into these diseases. To achieve this, the Australian BioCommons has launched the Human Genome Platforms Project (HGPP) to align and build on global standards such as the Global Alliance for Global Health's Beacon specification. This involves the collaboration of numerous institutes across several sub-projects focused on Data Access Control (DAC) automation, federated identity management and data archiving. Here, we outline our methods and associated challenges with implemented the Beacon version 2 protocol, as well as the Beacon Network and interface.}, urldate = {2023-08-24}, author = {Taouk, Kamile and Lin, Angela and Wong-Erasmus, Marie and Cowley, Mark and Boughtwood, Tiffany and Christiansen, Jeff and Copty, Joe and Ravishankar, Shyamsundar and Davies, Kylie and Downton, Matthew and Druken, Kelsey and Evans, Ben and Gaff, Clara and Gilbert, Andrew and Hall, Christina and Hobbs, Matthew and Hofmann, Oliver and Holliday, Jessica and Kaplan, Warren and Koufariotis, Ross and Kummerfeld, Sarah and Leonard, Conrad and Lonie, Andrew and Marks, Heath and McCafferty, Siobhann and Monro, David and Patterson, Andrew and Pearson, John and Pope, Bernard and Reisinger, Florian and Robinson, Andrew and San Kho Lin, Victor and Scullen, John and Shadbolt, Marion and Treloar, Andrew and Wang, Jingbo and Wood, Scott and Syed, Mustafa}, month = dec, year = {2022}, doi = {10.5281/zenodo.7402705}, keywords = {Beacon, Big data, Data commons, Data sharing, Network}, } @misc{so_deploying_2022, title = {Deploying a {User} {Interface} for {Sharing} {Federated} {Genomic} and {Phenotypic} {Data} using the {Beacon} v2 {Protocol}}, url = {https://zenodo.org/record/7416545}, abstract = {Given the current state of the data sharing landscape, there exists a need for a user-friendly interface through which queries can be constructed and set via the Beacon Network protocol. Here, we present an intuitive, minimal and streamlined user interface for the Beacon Network, as well as outline some challenges faced in the process.}, urldate = {2023-08-24}, author = {So, Dionne and Nguyen, Ricky and Do, Jessica and Kamarinos, Zoe and Lin, Angela and Cowley, Mark and Syed, Mustafa and Taouk, Kamile and Wong-Erasmus, Marie}, month = dec, year = {2022}, doi = {10.5281/zenodo.7416545}, } @misc{so_developing_2022, title = {Developing a {User} {Interface} for {Sharing} {Federated} {Genomic} and {Phenotypic} {Data} {Using} the {Beacon} v2 protocol}, url = {https://zenodo.org/record/7416582}, abstract = {Given the current state of the data sharing landscape, there exists a need for a user-friendly interface through which queries can be constructed and set via the Beacon Network protocol. Here, we present an intuitive, minimal and streamlined user interface for the Beacon Network, as well as outline some challenges faced in the process}, urldate = {2023-08-24}, author = {So, Dionne and Nguyen, Ricky and Do, Jessica and Kamarinos, Zoe}, month = dec, year = {2022}, doi = {10.5281/zenodo.7416582}, } @misc{shadbolt_advancing_2023, title = {Advancing {Human} {Genomics} {Data} {Sharing} {In} {Australia}: {Highlights} {From} {The} {Australian} {BioCommons}}, shorttitle = {Advancing {Human} {Genomics} {Data} {Sharing} {In} {Australia}}, url = {https://zenodo.org/record/8137358}, abstract = {Currently, the management and sharing of human genomics data in Australia is siloed within  national institutes. The Australian BioCommons aims to remove barriers to access through collaborative projects with research institutes, infrastructure partners, and government agencies. Here we describe the major projects that we are undertaking to advance Findable, Accessible, Interoperable, Reuseable (FAIR) human genomics data sharing in Australia. The Australian BioCommons provides strategic leadership, project coordination, technical expertise and domain knowledge to these national, multi-institutional and collaborative projects. The Human Genomes Platform Project (HGPP) aims to leverage international best practice technologies and global standards to accelerate FAIR human genomics data sharing in Australia. Involving leading Australian human genomics research organisations, along with national computing infrastructure partners, the HGPP is facilitating the deployment of much needed genomic data sharing infrastructure in Australia. Specifically, the project is investigating which existing international solutions meet Australian requirements with sub-projects aligning to GA4GH and other internationally developed software and standards (Beacon, CILogon, Resource Entitlement Management Software, Federated EGA).  The Australian Cardiovascular disease Data Commons (ACDC) will be a comprehensive, secure, scalable, internationally integrated data infrastructure connected to global best practice analysis platforms, to enable the identification of novel insights and predictive biomarkers for Coronary Artery Disease (CAD). Working with members of the CAD Frontiers consortium, the Australian BioCommons has established a pilot data portal, designed a data dictionary, and developed tools and pipelines to facilitate its use. We plan to onboard 18 cohorts with detailed clinical and molecular profiling data, representing 395,000 Australians.}, urldate = {2023-08-24}, author = {Shadbolt, Marion and Holliday, Jessica and Winter, Uwe and Manos, Steven and Christiansen, Jeff and Lonie, Andrew and Pope, Bernard}, month = jul, year = {2023}, doi = {10.5281/zenodo.8137358}, keywords = {data sharing, Australia, genomics, human, archiving, authorisation, authentication, DAC, CILogon, REMS, FEGA, cardiovascular disease, data commons, Gen3}, } @article{aaltonen_pan-cancer_2020, title = {Pan-cancer analysis of whole genomes}, volume = {578}, copyright = {2020 The Author(s)}, issn = {1476-4687}, url = {https://www.nature.com/articles/s41586-020-1969-6}, doi = {10.1038/s41586-020-1969-6}, abstract = {Cancer is driven by genetic change, and the advent of massively parallel sequencing has enabled systematic documentation of this variation at the whole-genome scale1–3. Here we report the integrative analysis of 2,658 whole-cancer genomes and their matching normal tissues across 38 tumour types from the Pan-Cancer Analysis of Whole Genomes (PCAWG) Consortium of the International Cancer Genome Consortium (ICGC) and The Cancer Genome Atlas (TCGA). We describe the generation of the PCAWG resource, facilitated by international data sharing using compute clouds. On average, cancer genomes contained 4–5 driver mutations when combining coding and non-coding genomic elements; however, in around 5\% of cases no drivers were identified, suggesting that cancer driver discovery is not yet complete. Chromothripsis, in which many clustered structural variants arise in a single catastrophic event, is frequently an early event in tumour evolution; in acral melanoma, for example, these events precede most somatic point mutations and affect several cancer-associated genes simultaneously. Cancers with abnormal telomere maintenance often originate from tissues with low replicative activity and show several mechanisms of preventing telomere attrition to critical levels. Common and rare germline variants affect patterns of somatic mutation, including point mutations, structural variants and somatic retrotransposition. A collection of papers from the PCAWG Consortium describes non-coding mutations that drive cancer beyond those in the TERT promoter4; identifies new signatures of mutational processes that cause base substitutions, small insertions and deletions and structural variation5,6; analyses timings and patterns of tumour evolution7; describes the diverse transcriptional consequences of somatic mutation on splicing, expression levels, fusion genes and promoter activity8,9; and evaluates a range of more-specialized features of cancer genomes8,10–18.}, language = {en}, number = {7793}, urldate = {2023-08-24}, journal = {Nature}, author = {Aaltonen, Lauri A. and Abascal, Federico and Abeshouse, Adam and Aburatani, Hiroyuki and Adams, David J. and Agrawal, Nishant and Ahn, Keun Soo and Ahn, Sung-Min and Aikata, Hiroshi and Akbani, Rehan and Akdemir, Kadir C. and Al-Ahmadie, Hikmat and Al-Sedairy, Sultan T. and Al-Shahrour, Fatima and Alawi, Malik and Albert, Monique and Aldape, Kenneth and Alexandrov, Ludmil B. and Ally, Adrian and Alsop, Kathryn and Alvarez, Eva G. and Amary, Fernanda and Amin, Samirkumar B. and Aminou, Brice and Ammerpohl, Ole and Anderson, Matthew J. and Ang, Yeng and Antonello, Davide and Anur, Pavana and Aparicio, Samuel and Appelbaum, Elizabeth L. and Arai, Yasuhito and Aretz, Axel and Arihiro, Koji and Ariizumi, Shun-ichi and Armenia, Joshua and Arnould, Laurent and Asa, Sylvia and Assenov, Yassen and Atwal, Gurnit and Aukema, Sietse and Auman, J. Todd and Aure, Miriam R. R. and Awadalla, Philip and Aymerich, Marta and Bader, Gary D. and Baez-Ortega, Adrian and Bailey, Matthew H. and Bailey, Peter J. and Balasundaram, Miruna and Balu, Saianand and Bandopadhayay, Pratiti and Banks, Rosamonde E. and Barbi, Stefano and Barbour, Andrew P. and Barenboim, Jonathan and Barnholtz-Sloan, Jill and Barr, Hugh and Barrera, Elisabet and Bartlett, John and Bartolome, Javier and Bassi, Claudio and Bathe, Oliver F. and Baumhoer, Daniel and Bavi, Prashant and Baylin, Stephen B. and Bazant, Wojciech and Beardsmore, Duncan and Beck, Timothy A. and Behjati, Sam and Behren, Andreas and Niu, Beifang and Bell, Cindy and Beltran, Sergi and Benz, Christopher and Berchuck, Andrew and Bergmann, Anke K. and Bergstrom, Erik N. and Berman, Benjamin P. and Berney, Daniel M. and Bernhart, Stephan H. and Beroukhim, Rameen and Berrios, Mario and Bersani, Samantha and Bertl, Johanna and Betancourt, Miguel and Bhandari, Vinayak and Bhosle, Shriram G. and Biankin, Andrew V. and Bieg, Matthias and Bigner, Darell and Binder, Hans and Birney, Ewan and Birrer, Michael and Biswas, Nidhan K. and Bjerkehagen, Bodil and Bodenheimer, Tom and Boice, Lori and Bonizzato, Giada and De Bono, Johann S. and Boot, Arnoud and Bootwalla, Moiz S. and Borg, Ake and Borkhardt, Arndt and Boroevich, Keith A. and Borozan, Ivan and Borst, Christoph and Bosenberg, Marcus and Bosio, Mattia and Boultwood, Jacqueline and Bourque, Guillaume and Boutros, Paul C. and Bova, G. Steven and Bowen, David T. and Bowlby, Reanne and Bowtell, David D. L. and Boyault, Sandrine and Boyce, Rich and Boyd, Jeffrey and Brazma, Alvis and Brennan, Paul and Brewer, Daniel S. and Brinkman, Arie B. and Bristow, Robert G. and Broaddus, Russell R. and Brock, Jane E. and Brock, Malcolm and Broeks, Annegien and Brooks, Angela N. and Brooks, Denise and Brors, Benedikt and Brunak, Søren and Bruxner, Timothy J. C. and Bruzos, Alicia L. and Buchanan, Alex and Buchhalter, Ivo and Buchholz, Christiane and Bullman, Susan and Burke, Hazel and Burkhardt, Birgit and Burns, Kathleen H. and Busanovich, John and Bustamante, Carlos D. and Butler, Adam P. and Butte, Atul J. and Byrne, Niall J. and Børresen-Dale, Anne-Lise and Caesar-Johnson, Samantha J. and Cafferkey, Andy and Cahill, Declan and Calabrese, Claudia and Caldas, Carlos and Calvo, Fabien and Camacho, Niedzica and Campbell, Peter J. and Campo, Elias and Cantù, Cinzia and Cao, Shaolong and Carey, Thomas E. and Carlevaro-Fita, Joana and Carlsen, Rebecca and Cataldo, Ivana and Cazzola, Mario and Cebon, Jonathan and Cerfolio, Robert and Chadwick, Dianne E. and Chakravarty, Dimple and Chalmers, Don and Chan, Calvin Wing Yiu and Chan, Kin and Chan-Seng-Yue, Michelle and Chandan, Vishal S. and Chang, David K. and Chanock, Stephen J. and Chantrill, Lorraine A. and Chateigner, Aurélien and Chatterjee, Nilanjan and Chayama, Kazuaki and Chen, Hsiao-Wei and Chen, Jieming and Chen, Ken and Chen, Yiwen and Chen, Zhaohong and Cherniack, Andrew D. and Chien, Jeremy and Chiew, Yoke-Eng and Chin, Suet-Feung and Cho, Juok and Cho, Sunghoon and Choi, Jung Kyoon and Choi, Wan and Chomienne, Christine and Chong, Zechen and Choo, Su Pin and Chou, Angela and Christ, Angelika N. and Christie, Elizabeth L. and Chuah, Eric and Cibulskis, Carrie and Cibulskis, Kristian and Cingarlini, Sara and Clapham, Peter and Claviez, Alexander and Cleary, Sean and Cloonan, Nicole and Cmero, Marek and Collins, Colin C. and Connor, Ashton A. and Cooke, Susanna L. and Cooper, Colin S. and Cope, Leslie and Corbo, Vincenzo and Cordes, Matthew G. and Cordner, Stephen M. and Cortés-Ciriano, Isidro and Covington, Kyle and Cowin, Prue A. and Craft, Brian and Craft, David and Creighton, Chad J. and Cun, Yupeng and Curley, Erin and Cutcutache, Ioana and Czajka, Karolina and Czerniak, Bogdan and Dagg, Rebecca A. and Danilova, Ludmila and Davi, Maria Vittoria and Davidson, Natalie R. and Davies, Helen and Davis, Ian J. and Davis-Dusenbery, Brandi N. and Dawson, Kevin J. and De La Vega, Francisco M. and De Paoli-Iseppi, Ricardo and Defreitas, Timothy and Tos, Angelo P. Dei and Delaneau, Olivier and Demchok, John A. and Demeulemeester, Jonas and Demidov, German M. and Demircioğlu, Deniz and Dennis, Nening M. and Denroche, Robert E. and Dentro, Stefan C. and Desai, Nikita and Deshpande, Vikram and Deshwar, Amit G. and Desmedt, Christine and Deu-Pons, Jordi and Dhalla, Noreen and Dhani, Neesha C. and Dhingra, Priyanka and Dhir, Rajiv and DiBiase, Anthony and Diamanti, Klev and Ding, Li and Ding, Shuai and Dinh, Huy Q. and Dirix, Luc and Doddapaneni, HarshaVardhan and Donmez, Nilgun and Dow, Michelle T. and Drapkin, Ronny and Drechsel, Oliver and Drews, Ruben M. and Serge, Serge and Dudderidge, Tim and Dueso-Barroso, Ana and Dunford, Andrew J. and Dunn, Michael and Dursi, Lewis Jonathan and Duthie, Fraser R. and Dutton-Regester, Ken and Eagles, Jenna and Easton, Douglas F. and Edmonds, Stuart and Edwards, Paul A. and Edwards, Sandra E. and Eeles, Rosalind A. and Ehinger, Anna and Eils, Juergen and Eils, Roland and El-Naggar, Adel and Eldridge, Matthew and Ellrott, Kyle and Erkek, Serap and Escaramis, Georgia and Espiritu, Shadrielle M. G. and Estivill, Xavier and Etemadmoghadam, Dariush and Eyfjord, Jorunn E. and Faltas, Bishoy M. and Fan, Daiming and Fan, Yu and Faquin, William C. and Farcas, Claudiu and Fassan, Matteo and Fatima, Aquila and {The ICGC/TCGA Pan-Cancer Analysis of Whole Genomes Consortium}}, month = feb, year = {2020}, note = {Number: 7793 Publisher: Nature Publishing Group}, keywords = {Cancer genomics}, pages = {82--93}, } @misc{european_council_chapter_2016, title = {Chapter 4 – {Controller} and processor {Regulation} ({EU}) 2016/679 ({GDPR})}, url = {https://gdpr-info.eu/chapter-4/}, abstract = {Section 1General obligations Article 24Responsibility of the controller Article 25Data protection by design and by default Article 26Joint controllers Article 27Representatives of controllers or processors not established in the Union Article 28Processor Article 29Processing under the authority of the controller or processor Article 30Records of processing activities Article 31Cooperation with the supervisory authority Section 2Security … Continue reading Chapter 4 – Controller and processor}, language = {en-US}, urldate = {2023-08-23}, journal = {General Data Protection Regulation (GDPR)}, author = {European Council}, year = {2016}, } @misc{european_council_general_2016, title = {General {Data} {Protection} {Regulation} ({GDPR}) – {Official} {Legal} {Text} {Regulation} ({EU}) 2016/679}, url = {https://gdpr-info.eu/}, abstract = {General Data Protection Regulation (EU GDPR) – The official PDF of the Regulation (EU) 2016/679, its recitals \& key issues as a neatly arranged website.}, language = {en-US}, urldate = {2023-08-23}, journal = {General Data Protection Regulation (GDPR)}, author = {European Council}, year = {2016}, } @misc{ega_egacryptor_2023, title = {{EGACryptor}}, url = {https://ega-archive.org/submission/tools/egacryptor}, urldate = {2023-03-16}, author = {EGA}, year = {2023}, } @misc{health_legal_genomic_2018, title = {Genomic {Data} \& {Privacy} {Law}: {A} summary of {Health} {Legal}'s report for {Australian} {Genomics}}, url = {https://www.australiangenomics.org.au/wp-content/uploads/2021/09/Summary-Health-Legal-Report.pdf}, urldate = {2022-01-05}, author = {{Health Legal}}, year = {2018}, } @misc{oaic_chapter_2019, title = {Chapter 8: {APP} 8 {Cross}-border disclosure of personal information}, shorttitle = {Chapter 8}, url = {https://www.oaic.gov.au/privacy/australian-privacy-principles/australian-privacy-principles-guidelines/chapter-8-app-8-cross-border-disclosure-of-personal-information}, abstract = {APP 8 and s 16C create a framework for the cross-border disclosure of personal information. The framework generally requires an APP entity to ensure that an overseas recipient will handle an individual’s personal information in accordance with the APPs, and makes the APP entity accountable if the overseas recipient mishandles the information. This reflects a central object of the Privacy Act, of facilitating the free flow of information across national borders while ensuring that the privacy of individuals is respected (s 2A(f)).}, language = {en}, urldate = {2023-08-23}, journal = {OAIC}, author = {OAIC}, month = jul, year = {2019}, note = {Last Modified: 2023-06-07T11:16:22+10:00}, } @misc{ega_ega_2022, title = {{EGA} submitter portal tutorial {\textbar} {VEIS} - {Valorización} de {EGA} para la industria y la sociedad}, url = {https://veis.bsc.es/events/ega-submitter-portal-tutorial/}, urldate = {2023-03-16}, author = {EGA}, year = {2022}, } @techreport{australian_genomics_national_2022, title = {National {Approach} to {Genomic} {Information} {Management} ({NAGIM}) {Implementation} {Recommendations}}, url = {https://www.australiangenomics.org.au/wp-content/uploads/2021/06/NAGIM-Implementation-Recommendations-December-2022.pdf}, urldate = {2023-03-06}, author = {Australian Genomics}, month = dec, year = {2022}, } @article{haas_ctrl_2021, title = {'{CTRL}': an online, {Dynamic} {Consent} and participant engagement platform working towards solving the complexities of consent in genomic research.}, volume = {29}, url = {http://dx.doi.org/10.1038/s41431-020-00782-w}, doi = {10.1038/s41431-020-00782-w}, abstract = {The complexities of the informed consent process for participating in research in genomic medicine are well-documented. Inspired by the potential for Dynamic Consent to increase participant choice and autonomy in decision-making, as well as the opportunities for ongoing participant engagement it affords, we wanted to trial Dynamic Consent and to do so developed our own web-based application (web app) called CTRL (control). This paper documents the design and development of CTRL, for use in the Australian Genomics study: a health services research project building evidence to inform the integration of genomic medicine into mainstream healthcare. Australian Genomics brought together a multi-disciplinary team to develop CTRL. The design and development process considered user experience; security and privacy; the application of international standards in data sharing; IT, operational and ethical issues. The CTRL tool is now being offered to participants in the study, who can use CTRL to keep personal and contact details up to date; make consent choices (including indicate preferences for return of results and future research use of biological samples, genomic and health data); follow their progress through the study; complete surveys, contact the researchers and access study news and information. While there are remaining challenges to implementing Dynamic Consent in genomic research, this study demonstrates the feasibility of building such a tool, and its ongoing use will provide evidence about the value of Dynamic Consent in large-scale genomic research programs.}, number = {4}, urldate = {2021-11-30}, journal = {European Journal of Human Genetics}, author = {Haas, Matilda A and Teare, Harriet and Prictor, Megan and Ceregra, Gabi and Vidgen, Miranda E and Bunker, David and Kaye, Jane and Boughtwood, Tiffany}, month = apr, year = {2021}, keywords = {Genetics research, Patient education}, pages = {687--698}, } @inproceedings{basney_cilogon_2019, title = {{CILogon}: {Enabling} {Federated} {Identity} and {Access} {Management} for {Scientific} {Collaborations}}, volume = {351}, shorttitle = {{CILogon}}, url = {https://pos.sissa.it/351/031}, doi = {10.22323/1.351.0031}, abstract = {CILogon provides a software platform that enables scientists to work together to meet their identity and access management (IAM) needs more effectively so they can allocate more time and effort to their core mission of scientific research. CILogon builds on open source Shibboleth and COmanage software to provide an integrated IAM platform for science, federated worldwide via eduGAIN. CILogon serves the unique needs of research collaborations, namely to dynamically form collaboration groups across organizations and countries, sharing access to data, instruments, compute clusters, and other resources to enable scientific discovery. We operate CILogon via a software-as-a-service model to ease integration with a variety of science applications, while making all CILogon software components publicly available under open source licenses to enable re-use. Since CILogon operations began in 2010, our service has expanded from a federated X.509 certification authority (CA) to an OpenID Connect provider, SAML Attribute Authority, and multi-tenant collaboration platform. In this article, we describe the current CILogon system.}, language = {en}, urldate = {2023-08-22}, booktitle = {Proceedings of {International} {Symposium} on {Grids} \& {Clouds} 2019 — {PoS}({ISGC2019})}, publisher = {SISSA Medialab}, author = {Basney, Jim and Flanagan, Heather and Fleury, Terry and Gaynor, Jeff and Koranda, Scott and Oshrin, Benn}, month = nov, year = {2019}, pages = {031}, } @article{gao_integrative_2013, title = {Integrative {Analysis} of {Complex} {Cancer} {Genomics} and {Clinical} {Profiles} {Using} the {cBioPortal}}, volume = {6}, url = {https://www.science.org/doi/10.1126/scisignal.2004088}, doi = {10.1126/scisignal.2004088}, abstract = {The cBioPortal for Cancer Genomics (http://cbioportal.org) provides a Web resource for exploring, visualizing, and analyzing multidimensional cancer genomics data. The portal reduces molecular profiling data from cancer tissues and cell lines into readily understandable genetic, epigenetic, gene expression, and proteomic events. The query interface combined with customized data storage enables researchers to interactively explore genetic alterations across samples, genes, and pathways and, when available in the underlying data, to link these to clinical outcomes. The portal provides graphical summaries of gene-level data from multiple platforms, network visualization and analysis, survival analysis, patient-centric queries, and software programmatic access. The intuitive Web interface of the portal makes complex cancer genomics profiles accessible to researchers and clinicians without requiring bioinformatics expertise, thus facilitating biological discoveries. Here, we provide a practical guide to the analysis and visualization features of the cBioPortal for Cancer Genomics.}, number = {269}, urldate = {2023-08-22}, journal = {Science Signaling}, author = {Gao, Jianjiong and Aksoy, Bülent Arman and Dogrusoz, Ugur and Dresdner, Gideon and Gross, Benjamin and Sumer, S. Onur and Sun, Yichao and Jacobsen, Anders and Sinha, Rileen and Larsson, Erik and Cerami, Ethan and Sander, Chris and Schultz, Nikolaus}, month = apr, year = {2013}, note = {Publisher: American Association for the Advancement of Science}, pages = {pl1--pl1}, } @article{cerami_cbio_2012, title = {The {cBio} {Cancer} {Genomics} {Portal}: {An} {Open} {Platform} for {Exploring} {Multidimensional} {Cancer} {Genomics} {Data}}, volume = {2}, issn = {2159-8274}, shorttitle = {The {cBio} {Cancer} {Genomics} {Portal}}, url = {https://doi.org/10.1158/2159-8290.CD-12-0095}, doi = {10.1158/2159-8290.CD-12-0095}, abstract = {The cBio Cancer Genomics Portal (http://cbioportal.org) is an open-access resource for interactive exploration of multidimensional cancer genomics data sets, currently providing access to data from more than 5,000 tumor samples from 20 cancer studies. The cBio Cancer Genomics Portal significantly lowers the barriers between complex genomic data and cancer researchers who want rapid, intuitive, and high-quality access to molecular profiles and clinical attributes from large-scale cancer genomics projects and empowers researchers to translate these rich data sets into biologic insights and clinical applications. Cancer Discov; 2(5); 401–4. ©2012 AACR.}, number = {5}, urldate = {2023-08-22}, journal = {Cancer Discovery}, author = {Cerami, Ethan and Gao, Jianjiong and Dogrusoz, Ugur and Gross, Benjamin E. and Sumer, Selcuk Onur and Aksoy, Bülent Arman and Jacobsen, Anders and Byrne, Caitlin J. and Heuer, Michael L. and Larsson, Erik and Antipin, Yevgeniy and Reva, Boris and Goldberg, Arthur P. and Sander, Chris and Schultz, Nikolaus}, month = may, year = {2012}, pages = {401--404}, } @misc{noauthor_global_nodate, title = {A global look at population sequencing efforts}, url = {https://www.idtdna.com/pages/community/blog/post/a-global-look-at-population-sequencing-efforts}, abstract = {As the price of sequencing falls, nations big and small are embarking on NGS projects to investigate disease and build clinical capacity.}, language = {en}, urldate = {2023-08-22}, journal = {Integrated DNA Technologies}, } @misc{nature_data_2022, title = {Data {Repository} {Guidance} {\textbar} {Scientific} {Data}}, copyright = {©2022 Macmillan Publishers Limited. All Rights Reserved.}, url = {https://www.nature.com/sdata/policies/repositories}, abstract = {Data Repository Guidance}, language = {en}, urldate = {2022-08-31}, author = {Nature}, year = {2022}, note = {ISSN: 2052-4463}, } @misc{birney_genomics_2017, title = {Genomics in healthcare: {GA4GH} looks to 2022}, copyright = {© 2017, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, shorttitle = {Genomics in healthcare}, url = {https://www.biorxiv.org/content/10.1101/203554v1}, doi = {10.1101/203554}, abstract = {The Global Alliance for Genomics and Health (GA4GH), the standards-setting body in genomics for healthcare, aims to accelerate biomedical advancement globally. We describe the differences between healthcare- and research-driven genomics, discuss the implications of global, population-scale collections of human data for research, and outline mission-critical considerations in ethics, regulation, technology, data protection, and society. We present a crude model for estimating the rate of healthcare-funded genomes worldwide that accounts for the preparedness of each country for genomics, and infers a progression of cancer-related sequencing over time. We estimate that over 60 million patients will have their genome sequenced in a healthcare context by 2025. This represents a large technical challenge for healthcare systems, and a huge opportunity for research. We identify eight major practical, principled arguments to support the position that virtual cohorts of 100 million people or more would have tangible research benefits.}, language = {en}, urldate = {2023-05-11}, publisher = {bioRxiv}, author = {Birney, Ewan and Vamathevan, Jessica and Goodhand, Peter}, month = oct, year = {2017}, note = {Pages: 203554 Section: New Results}, } @misc{cliffe_overview_2023, title = {Overview: {Terra} on {Azure}}, shorttitle = {Overview}, url = {https://support.terra.bio/hc/en-us/articles/12028783864859-Overview-Terra-on-Azure}, abstract = {This article summarizes the cloud components you'll use when working in Terra, and how working in the cloud differs from working locally. This is a living document. Check back here to see the current state of Terra on Azure. Terra on Azure is a public preview release intended to allow users early to access tools and resources on Terra. Your candid feedback will help us improve the Terra experience as we develop and roll out additional functionality.}, language = {en-US}, urldate = {2023-05-23}, journal = {Terra Support}, author = {Cliffe, Allie}, month = may, year = {2023}, } @article{bonisch_harvesting_2022, title = {Harvesting metadata in clinical care: a crosswalk between {FHIR}, {OMOP}, {CDISC} and {openEHR} metadata}, volume = {9}, copyright = {2022 The Author(s)}, issn = {2052-4463}, shorttitle = {Harvesting metadata in clinical care}, url = {https://www.nature.com/articles/s41597-022-01792-7}, doi = {10.1038/s41597-022-01792-7}, abstract = {Metadata describe information about data source, type of creation, structure, status and semantics and are prerequisite for preservation and reuse of medical data. To overcome the hurdle of disparate data sources and repositories with heterogeneous data formats a metadata crosswalk was initiated, based on existing standards. FAIR Principles were included, as well as data format specifications. The metadata crosswalk is the foundation of data provision between a Medical Data Integration Center (MeDIC) and researchers, providing a selection of metadata information for research design and requests. Based on the crosswalk, metadata items were prioritized and categorized to demonstrate that not one single predefined standard meets all requirements of a MeDIC and only a maximum data set of metadata is suitable for use. The development of a convergence format including the maximum data set is the anticipated solution for an automated transformation of metadata in a MeDIC.}, language = {en}, number = {1}, urldate = {2023-05-08}, journal = {Scientific Data}, author = {Bönisch, Caroline and Kesztyüs, Dorothea and Kesztyüs, Tibor}, month = oct, year = {2022}, note = {Number: 1 Publisher: Nature Publishing Group}, keywords = {Health care, Public health}, pages = {659}, } @article{yoo_hostseq_2023, title = {{HostSeq}: a {Canadian} whole genome sequencing and clinical data resource}, volume = {24}, issn = {2730-6844}, shorttitle = {{HostSeq}}, url = {https://doi.org/10.1186/s12863-023-01128-3}, doi = {10.1186/s12863-023-01128-3}, abstract = {HostSeq was launched in April 2020 as a national initiative to integrate whole genome sequencing data from 10,000 Canadians infected with SARS-CoV-2 with clinical information related to their disease experience. The mandate of HostSeq is to support the Canadian and international research communities in their efforts to understand the risk factors for disease and associated health outcomes and support the development of interventions such as vaccines and therapeutics. HostSeq is a collaboration among 13 independent epidemiological studies of SARS-CoV-2 across five provinces in Canada. Aggregated data collected by HostSeq are made available to the public through two data portals: a phenotype portal showing summaries of major variables and their distributions, and a variant search portal enabling queries in a genomic region. Individual-level data is available to the global research community for health research through a Data Access Agreement and Data Access Compliance Office approval. Here we provide an overview of the collective project design along with summary level information for HostSeq. We highlight several statistical considerations for researchers using the HostSeq platform regarding data aggregation, sampling mechanism, covariate adjustment, and X chromosome analysis. In addition to serving as a rich data source, the diversity of study designs, sample sizes, and research objectives among the participating studies provides unique opportunities for the research community.}, language = {en}, number = {1}, urldate = {2023-05-02}, journal = {BMC Genomic Data}, author = {Yoo, S. and Garg, E. and Elliott, LT and Hung, RJ and Halevy, AR and Brooks, JD and Bull, SB and Gagnon, F. and Greenwood, CMT and Lawless, JF and Paterson, AD and Sun, L. and Zawati, MH and Lerner-Ellis, J. and Abraham, RJS and Birol, I. and Bourque, G. and Garant, J-M and Gosselin, C. and Li, J. and Whitney, J. and Thiruvahindrapuram, B. and Herbrick, J-A and Lorenti, M. and Reuter, MS and Adeoye, OO and Liu, S. and Allen, U. and Bernier, FP and Biggs, CM and Cheung, AM and Cowan, J. and Herridge, M. and Maslove, DM and Modi, BP and Mooser, V. and Morris, SK and Ostrowski, M. and Parekh, RS and Pfeffer, G. and Suchowersky, O. and Taher, J. and Upton, J. and Warren, RL and Yeung, RSM and Aziz, N. and Turvey, SE and Knoppers, BM and Lathrop, M. and Jones, SJM and Scherer, SW and Strug, LJ}, month = may, year = {2023}, keywords = {COVID-19, Clinical databank, Host genetics, SARS-CoV-2, Whole genome sequencing}, pages = {26}, } @article{bruns_consent_2022, title = {Consent {Modules} for {Data} {Sharing} via the {German} {Human} {Genome}-{Phenome} {Archive} ({GHGA})}, url = {https://zenodo.org/record/6828131}, abstract = {The German Human Genome-Phenome Archive (GHGA) is a research consortium which is currently establishing a federated data infrastructure allowing the secure storage of and controlled access to omics and related health data consented for scientific research use. This whitepaper contains guidance for Data Submitters on how to update their consent forms such that data may be shared via GHGA for secondary research use.}, language = {eng}, urldate = {2023-04-28}, author = {Bruns, Andreas and Benet-Pages, Anna and Eufinger, Jan and Graessner, Holm and Kohlbacher, Oliver and Molnár-Gábor, Fruzsina and Parker, Simon and Schickhardt, Christoph and Stegle, Oliver and Winkler, Eva}, month = jul, year = {2022}, note = {Publisher: Zenodo}, keywords = {Data sharing, Informed consent, Omics archive}, } @article{molnar-gabor_bridging_2022, title = {Bridging the {European} {Data} {Sharing} {Divide} in {Genomic} {Science}}, volume = {24}, url = {https://www.jmir.org/2022/10/e37236}, doi = {10.2196/37236}, abstract = {In this viewpoint, we argue for the importance of creating data spaces for genomic research that are detached from contexts in which fundamental rights concerns related to surveillance measures override a purpose-specific balancing of fundamental rights. Genomic research relies on molecular and phenotypic data, on comparing findings within large data sets, on searchable metadata, and on translating research results into a clinical setting. These methods require sensitive genetic and health data to be shared across borders. International data sharing between the European Union (EU) or the European Economic Area and third countries has accordingly become a cornerstone of genomics. The EU General Data Protection Regulation contains rules that accord privileged status to data processing for research purposes to ensure that strict data protection requirements do not impede biomedical research. However, the General Data Protection Regulation rules applicable to international transfers of data accord no such preferential treatment to international data transfers made in the research context. The rules that govern the international transfer of data create considerable barriers to international data sharing because of the cost-intensive procedural and substantive compliance burdens that they impose. For certain jurisdictions and select use cases, there exist practically no lawful mechanisms to enable the international transfer of data because of concerns about the protection of fundamental rights. The proposed solutions further fail to address the need to share large data sets of local and regional cohorts across national borders to enable joint analyses. The European Health Data Space is an emerging federated, EU-wide data infrastructure that is intended to function as an infrastructure bringing together EU health data to improve patient care and enable the secondary use of health-related data for research purposes. Such infrastructure is implementing new institutions to support its functioning and is being implemented in reliance on a new enabling law, the regulation on the European Health Data Space. This innovation provides the opportunity to facilitate EU contribution to international genomic research efforts. The draft regulation for this data space provides for a concept of data infrastructure intended to enable cross-border data exchange and access, including access to genetic and health data for scientific analysis purposes. The draft regulation also provides for obligations of national actors aimed at making data widely available. This effort is laudable. However, in the absence of further, more fundamental changes to the manner in which the EU regulates the secondary use of health data, it is reasonable to believe that EU participation in international genomic research efforts will remain impeded.}, language = {EN}, number = {10}, urldate = {2023-04-28}, journal = {Journal of Medical Internet Research}, author = {Molnár-Gábor, Fruzsina and Beauvais, Michael J. S. and Bernier, Alexander and Jimenez, Maria Pilar Nicolas and Recuero, Mikel and Knoppers, Bartha Maria}, month = oct, year = {2022}, note = {Company: Journal of Medical Internet Research Distributor: Journal of Medical Internet Research Institution: Journal of Medical Internet Research Label: Journal of Medical Internet Research Publisher: JMIR Publications Inc., Toronto, Canada}, pages = {e37236}, } @misc{ibm_aspera_2023, title = {Aspera}, url = {https://www.ibm.com/products/aspera}, abstract = {With IBM Aspera, transfer, send and share large files and big data, fast and securely, over any distance.}, language = {en-us}, urldate = {2023-04-27}, author = {IBM}, month = apr, year = {2023}, } @techreport{kerry_federated_2022, title = {Federated {EGA} {Node} {Operations} {Guidelines}}, url = {https://ega-archive.org/files/EGA-Node-Operations-v2.pdf}, language = {en}, urldate = {2023-04-24}, author = {Kerry, Giselle and Keane, Thomas and Rambla, Jordi and Spalding, Dylan and Flicek, Paul and Parkinson, Helen and Freeberg, Mallory}, year = {2022}, } @misc{neic_neic_2023, title = {{NeIC} {SDA} {Operations} handbook}, url = {https://neic-sda.readthedocs.io/en/latest/}, urldate = {2023-04-24}, author = {NeIC}, year = {2023}, } @article{warren_context_2023, title = {Context matters in genomic data sharing: a qualitative investigation into responses from the {Australian} public}, volume = {15}, issn = {1755-8794}, shorttitle = {Context matters in genomic data sharing}, url = {https://doi.org/10.1186/s12920-023-01452-8}, doi = {10.1186/s12920-023-01452-8}, abstract = {Understanding public attitudes to genomic data sharing is widely seen as key in shaping effective governance. However, empirical research in this area often fails to capture the contextual nuances of diverse sharing practices and regulatory concerns encountered in real-world genomic data sharing. This study aimed to investigate factors affecting public attitudes to data sharing through responses to diverse genomic data sharing scenarios.}, number = {3}, urldate = {2023-04-20}, journal = {BMC Medical Genomics}, author = {Warren, Vanessa and Critchley, Christine and McWhirter, Rebekah and Walshe, Jarrod and Nicol, Dianne}, month = apr, year = {2023}, keywords = {Benefit sharing, Commercialization, Future use, Genetic data, Genomic data sharing, Governance, Public attitudes}, pages = {275}, } @article{gefenas_controversies_2022, title = {Controversies between regulations of research ethics and protection of personal data: informed consent at a cross-road}, volume = {25}, issn = {1572-8633}, shorttitle = {Controversies between regulations of research ethics and protection of personal data}, url = {https://doi.org/10.1007/s11019-021-10060-1}, doi = {10.1007/s11019-021-10060-1}, abstract = {This paper explores some key discrepancies between two sets of normative requirements applicable to the research use of personal data and human biological materials: (a) the data protection regime which follows the application of the European Union General Data Protection Regulation (GDPR), and (b) the Declaration of Helsinki, CIOMS guidelines and other research ethics regulations. One source of this controversy is that the GDPR requires consent to process personal data to be clear, concise, specific and granular, freely given and revocable and therefore has challenged the concept of ‘broad consent’, which has been widely applied in the context of biobanking. Another source of controversy is the interplay between regulations of research ethics and protection of personal data related to the secondary use of personal data and biological materials. In this case, the GDPR ‘research condition’ provides an alternative to re-consent for the use of previously collected personal data and biological materials. Although the mentioned controversies have been raised in the legal literature, they have not been explicitly addressed from the research ethics perspective. Should consent be regarded as a priority legal basis for personal data processing in health data research? Can broad consent still be a suitable legal ground for biobanking? What should be the role of research ethics provisions that differ from the GDPR standards, and what should be the role and function of research ethics committees in the changing environment of health data research? These are the ongoing controversies to be explored in the paper.}, language = {en}, number = {1}, urldate = {2023-04-20}, journal = {Medicine, Health Care and Philosophy}, author = {Gefenas, Eugenijus and Lekstutiene, J. and Lukaseviciene, V. and Hartlev, M. and Mourby, M. and Cathaoir, K.Ó}, month = mar, year = {2022}, keywords = {General data protection regulation, Informed consent, Research ethics, Research ethics committee}, pages = {23--30}, } @article{lalova-spinks_application_2023, title = {The application of data altruism in clinical research through empirical and legal analysis lenses}, volume = {10}, issn = {2296-858X}, url = {https://www.frontiersin.org/articles/10.3389/fmed.2023.1141685}, abstract = {BackgroundThe legal framework for clinical research in the EU is complex and the lack of harmonization of the relevant legal and ethical rules remains one of the main challenges for stakeholders in the field. The recently adopted Data Governance Act (DGA) and the proposal for a European Health Data Space (EHDS) promise to solve the existing challenges with respect to access to and (re)use of personal data for research, but also risk to further complexify the field. The DGA introduced a novel mechanism – data altruism. Data altruism is understood as the voluntary sharing of personal and non-personal data, based on the consent of data subjects or the permission of natural and legal persons, without seeking a reward and for objectives of general interest. This study aimed to gain insights into the opinion of clinical research stakeholders on data altruism, and to critically discuss key issues pertaining to the application of data altruism from a legal point of view.MethodsSemi-structured interviews with (1) data protection officers (DPOs) and legal experts working with commercial and academic sponsors of clinical trials, (2) investigators, and (3) members of research ethics committees. Data underwent framework analysis. The legal discussion was comprised of legal doctrinal research with focus on the DGA, EHDS proposal, and the interplay with the EU General Data Protection Regulation (GDPR).ResultsFourteen experts took part in the interviews, more than half of which were DPOs/legal experts. Interviewees were based in seven EU Member states and the United Kingdom. The majority of participants were critical towards the data altruism mechanism and pointed out challenges and risks associated with its application.ConclusionAlthough data altruism holds the potential to facilitate data sharing, its application in clinical research at the moment is still riddled with uncertainties. The interplay of the DGA rules with the provisions of the GDPR and the EHDS proposal are insufficiently clear and further efforts from the legislator are required to build a working, patient-centered, and research fostering data altruism system.}, urldate = {2023-04-20}, journal = {Frontiers in Medicine}, author = {Lalova-Spinks, Teodora and Meszaros, Janos and Huys, Isabelle}, year = {2023}, } @misc{group_international_nodate, type = {Text}, title = {International {Comparative} {Legal} {Guides}}, url = {https://iclg.com/practice-areas/data-protection-laws-and-regulations/australia}, abstract = {Data Protection Laws and Regulations covering issues in Australia of Relevant Legislation and Competent Authorities, Definitions, Territorial Scope}, language = {en}, urldate = {2023-04-14}, journal = {International Comparative Legal Guides International Business Reports}, author = {Group, Global Legal}, note = {Archive Location: United Kingdom Publisher: Global Legal Group}, } @misc{noauthor_welcome_nodate, title = {Welcome to the documentation for the phenopacket-schema! — phenopacket-schema 2.0 documentation}, url = {https://phenopacket-schema.readthedocs.io/en/latest/}, urldate = {2023-03-29}, } @techreport{cowley_virtual_2022, title = {Virtual {Cohort} {Assembly} {Discovery} {Phase} {Report}: {National} {Community} {Needs} \& {Candidate} {Solutions}}, shorttitle = {Virtual {Cohort} {Assembly} {Discovery} {Phase} {Report}}, url = {https://zenodo.org/record/7439886}, abstract = {The Human Genomes Platform Project (HGPP) is a nationally-funded collaborative research project aiming to enhance capability for securely and responsibly sharing human genomics research data.  National and international connectivity will maximise the utility of these sensitive and valuable assets. The partners on the project represent many of the largest human genome sequencing and analysis efforts in Australia. Currently there is no way to identify virtual cohorts of individuals who have had their genomes sequenced nationally as it is not possible to query across the separate assets from each participating genomics repository. This work aims to implement a system that can be used to identify cohorts of individuals and related data assets across the repositories located at each of the partner institutes (i.e., UMCCR/Australian Genomics, QIMRB, ZERO/CCIA, Garvan and NCI) The initial focus of the virtual cohorts sub-project within the HGPP was a knowledge discovery and recording phase to define: the current state of cross-institutional human genomic data querying in Australia the set of problems that need to be addressed  key stakeholders and their (likely) requirements. As such, this document records: the current state of processes and tools for virtual cohort querying national community needs candidate solutions to enable cross-institutional virtual cohort querying recommendations on preferred technology and proposed implementation architecture This document will be used as a reference to plan the pilot for a system that addresses prioritised requirements to create a Minimum Viable Product (MVP). The primary audiences for this document include the HGPP sub-project team, other HGPP stakeholders, and the project reference group.}, language = {eng}, urldate = {2023-03-29}, institution = {Zenodo}, author = {Cowley, Mark and Downton, Matthew and Holliday, Jessica and Kummerfeld, Sarah and Leonard, Conrad and Lin, Angela and Pope, Bernard and San Kho Lin, Victor and Ravishankar, Shyamsunder and Shadbolt, Marion and Syed, Mustafa and Taouk, Kamile and Wong-Erasmus, Marie}, month = dec, year = {2022}, doi = {10.5281/zenodo.7439886}, keywords = {GA4GH, beacon, beacon network, beacon v2, cohort, data querying, data sharing, harmonisation}, } @techreport{carnuccio_human_2022, title = {Human {Genomes} {Platform} {Project}: {Federated} {Identity} and {Access} {Management} ({IAM}) {Discovery} {Phase} {Report}}, shorttitle = {Human {Genomes} {Platform} {Project}}, url = {https://zenodo.org/record/6644009}, abstract = {The Human Genomes Platform Project (HGPP) is an Australian nationally-funded collaborative research project aiming to enhance capability for securely and responsibly sharing human genomics research data.  National and international connectivity will maximise the utility of these sensitive and valuable assets.  At the heart of any technology platform is identity and access management (IAM): a collection of standards, policies and technologies that enable a platform to determine whether to permit access to a user. In a federated environment such as the Australian/global genomics community, IAM is the glue that enables loosely coupled systems to establish strong trust relationships for the purposes of data sharing.  The initial focus of the Federated IAM sub-project team was a discovery and recording phase to define the current state of identity and access management in the community, the set of problems that need to be addressed, and key stakeholders and their (likely) requirements. The Federated IAM Discovery Phase Report (this document) records: the current state of processes and tools for identity and access management across the community, national community needs, gap analysis, and identification of international projects with components suitable to canvas and potentially pilot.This document will be used as a reference to plan the pilot for a system that addresses prioritised requirements to create a Minimum Viable Product (MVP). The Australian human genome research community has many connections to consortia in Europe and the USA, but retains the need to work independently, according to our own national requirements and guidelines. International readers, particularly those who are outside Europe and the US, may find this discovery report useful for reference in their own human genome research data sharing program efforts.}, urldate = {2023-03-29}, institution = {Zenodo}, author = {Carnuccio, Patrick and Cowley, Mark and Davies, Kylie and Downton, Matthew and Dumevska, Biljana and Holliday, Jessica and Kummerfeld, Sarah and Lin, Angela and Monro, David and Patterson, Andrew and Pope, Bernie and Ravishankar, Shyamsunder and Robinson, Andrew and Scullen, John and Shadbolt, Marion and Syed, Mustafa and Wood, Scott and Wong-Erasmus, Marie}, month = jun, year = {2022}, doi = {10.5281/zenodo.6644009}, keywords = {Attribute aggregation, Authentication and authorization infrastructure (AAI), CILogon, Data sharing, ELIXIR AAI, Federated identity management, Human Genomics, Identity and Access Management (IAM), Management actions, Security \& privacy, User policy}, } @techreport{carnuccio_human_2022-1, title = {Human {Genomes} {Platform} {Project}: {DAC} {Automation} {Discovery} {Phase} {Report}}, shorttitle = {Human {Genomes} {Platform} {Project}}, url = {https://zenodo.org/record/6644050}, abstract = {The Human Genomes Platform Project (HGPP) is an Australian nationally-funded collaborative research project aiming to enhance capability for securely and responsibly sharing human genomics research data. National and international connectivity will maximise the utility of these sensitive and valuable assets.  A major challenge to human genome data sharing is navigating restrictions on secondary use. Decisions on how and to whom to grant access to data require significant human effort by DAC (Data Access) Committees. This manual approach is slow and burdensome. The aims of the DAC Automation sub-project are to explore a new data access request and approval paradigm driven by automation for the national human genome research community. The initial focus of the DAC Automation sub-project team was a discovery and recording phase to define the current state of data access requests and data sharing agreements within the community, the set of problems that need to be addressed, and key sub-project areas and their (likely) requirements. The DAC Automation Discovery Phase Report (this document) records: the current state of processes and tools for data access requests and data sharing across the community, national community needs, gap analysis, and identification of international projects with potential solution components for piloting in later project stages.This document will be the reference for planning the pilot for a system that addresses prioritised requirements to create a Minimum Viable Product (MVP). The Australian human genome research community has many connections to consortia in Europe and the USA, but retains the need to work independently, according to our own national requirements and guidelines. International readers, particularly those who are outside Europe and the US, may find this discovery report useful for reference in their own human genome research data sharing program efforts.}, urldate = {2023-03-29}, institution = {Zenodo}, author = {Carnuccio, Patrick and Cowley, Mark and Davies, Kylie and Druken, Kelsey and Holliday, Jessica and Kummerfeld, Sarah and Monro, David and Patterson, Andrew and Pearson, John and Pope, Bernie and Scullen, John and Shadbolt, Marion and Wong-Erasmus, Marie and Wood, Scott}, month = jun, year = {2022}, doi = {10.5281/zenodo.6644050}, keywords = {Automation, BAM, DUOS, Data Access Application, Data Access Committee, Data Access Control, Data Access Requests, Data Policy, Data Sharing, Ethics, Governance, Human Genomics, Principal Investigator, REMS, Research, Technology Infrastructure, Technology platform}, } @misc{munoz-torres_bosc2022_2022, title = {{BOSC2022} {S3ba} {Monica} {Munoz} {Torres} {The} {GA4GH} {Phenopacket} schema, {A} computable representation of clini}, url = {https://www.youtube.com/watch?v=Qvnmz_wIggg}, abstract = {Presented at BOSC 2022, part of ISMB, in Madison, WI}, urldate = {2023-03-23}, author = {Munoz-Torres, Monica}, month = oct, year = {2022}, } @misc{department_of_industry_australias_2022, type = {Strategy or plan}, title = {Australia’s {National} {Science} {Statement} {\textbar} {Department} of {Industry}, {Science} and {Resources}}, url = {https://www.industry.gov.au/publications/australias-national-science-statement}, abstract = {Australia’s National Science Statement sets a long-term approach to achieving a strong science system and provides guidance for government investment and decision making.}, language = {en-AU}, urldate = {2023-03-09}, journal = {https://www.industry.gov.au/node/75715}, author = {Department of Industry, Science {and} Resources}, month = sep, year = {2022}, } @techreport{amrab_australian_2022, type = {text}, title = {Australian {Medical} {Research} and {Innovation} {Priorities} 2022–2024}, shorttitle = {[{AMRAB} 2022]}, url = {https://www.health.gov.au/resources/publications/australian-medical-research-and-innovation-priorities-2022-2024?language=en}, abstract = {The Australian Medical Research and Innovation Priorities 2022-2024 are the fourth and current set of Priorities for the Medical Research Future Fund (MRFF).}, language = {en}, urldate = {2023-03-09}, author = {AMRAB}, month = nov, year = {2022}, note = {Publisher: Australian Government Department of Health and Aged Care}, } @techreport{austrade_clinical_2018, title = {Clinical {Trials} {Capability} {Report}}, url = {https://www.austrade.gov.au/ArticleDocuments/2814/Clincal-Trials-Capability-Report.pdf.aspx}, urldate = {2023-03-06}, author = {AusTrade}, year = {2018}, } @techreport{department_of_education_skills_and_employment_2021_2022, type = {Text}, title = {2021 {National} {Research} {Infrastructure} {Roadmap}}, url = {https://www.education.gov.au/national-research-infrastructure/resources/2021-national-research-infrastructure-roadmap}, abstract = {The 2021 National Research Infrastructure Roadmap identifies needs and sets priorities for future investment in Australia's national research infrastructure. It will guide the 2022 Research Infrastructure Investment Plan, and seeks to maintain Australian excellence in research and innovation and support Australia’s ability to address emerging research challenges.  This document is the accessible.doc version of the 2021 National Research Infrastructure Roadmap.}, language = {dcterms.RFC4646; en-AU}, urldate = {2023-03-06}, author = {Department of Education, Skills {and} Employment}, month = apr, year = {2022}, } @techreport{aihw_health_2022, title = {Health expenditure {Australia} 2020-21}, url = {https://www.aihw.gov.au/reports/health-welfare-expenditure/health-expenditure-australia-2020-21/contents/about}, abstract = {Regular reporting of national health expenditure is vital to understanding the health system and its relationship to the economy as a whole.Total health spending was \$220.9 billion, equating to \$8...}, language = {en-AU}, urldate = {2023-03-06}, author = {AIHW}, month = nov, year = {2022}, } @techreport{amrab_australian_2021, type = {text}, title = {Australian {Medical} {Research} and {Innovation} {Strategy} 2021-2026}, url = {https://www.health.gov.au/resources/publications/australian-medical-research-and-innovation-strategy-2021-2026?language=en}, abstract = {The Australian Medical Research and Innovation Strategy 2021-2026 sets out the vision, aim and strategic objectives of the Medical Research Future Fund (MRFF).}, language = {en}, urldate = {2023-03-06}, author = {AMRAB}, month = nov, year = {2021}, note = {Publisher: Australian Government Department of Health and Aged Care}, } @techreport{department_of_industry_science_and_resources_australias_2017, type = {Strategy or plan}, title = {Australia’s {National} {Science} {Statement}}, url = {https://www.industry.gov.au/publications/australias-national-science-statement}, abstract = {Australia’s National Science Statement sets a long-term approach to achieving a strong science system and provides guidance for government investment and decision making.}, language = {en-AU}, urldate = {2023-03-06}, author = {Department of Industry, Science {and} Resources}, month = jan, year = {2017}, } @techreport{department_of_industry_science_and_resources_australias_2015, type = {Strategy or plan}, title = {Australia's {Science} and {Research} {Priorities}}, url = {https://www.industry.gov.au/publications/australias-science-and-research-priorities-2015}, abstract = {The Australian Government’s Science and Research Priorities 2015 identified areas of immediate and critical importance to the nation and our place in the world.}, language = {en-AU}, urldate = {2023-03-06}, author = {Department of Industry, Science {and} Resources}, month = may, year = {2015}, } @misc{abs_caring_2022, title = {A caring nation – 15 per cent of {Australia}’s workforce in {Health} {Care} and {Social} {Assistance} industry {\textbar} {Australian} {Bureau} of {Statistics}}, url = {https://www.abs.gov.au/media-centre/media-releases/caring-nation-15-cent-australias-workforce-health-care-and-social-assistance-industry}, language = {en}, urldate = {2023-03-06}, author = {ABS}, month = dec, year = {2022}, } @techreport{department_of_industry_science_and_resources_national_2022, title = {National {Reconstruction} {Fund}: consultation paper}, url = {https://consult.industry.gov.au/national-reconstruction-fund}, abstract = {The Australian Government is establishing the \$15 billion NRF to diversify and transform Australia's industry and economy. By establishing the NRF the government is helping to create secure, well-paid jobs; secure Australia’s future prosperity; and drive sustainable economic growth. The NRF will invest across the 7 priority areas outlined below: renewables and low emissions technologies medical science transport value-add in the agriculture, forestry and fisheries sectors value-add in resources defence capability enabling capabilities It will provide finance (including loans, guarantees and equity) to drive investments that add value and develop capability. The NRF will operate commercially to deliver a positive rate of return. It will be governed by a board who will make independent investment decisions guided by an investment mandate.}, urldate = {2023-03-06}, author = {Department of Industry, Science {and} Resources}, year = {2022}, } @techreport{tripp_economic_2021, title = {The {Economic} {Impact} and {Functional} {Applications} of {Human} {Genetics} and {Genomics}}, url = {https://www.ashg.org/wp-content/uploads/2021/05/ASHG-TEConomy-Impact-Report-Final.pdf}, abstract = {Commissioned by the American Society of Human Genetics Produced by TEConomy Partners, LLC.}, language = {en}, author = {Tripp, Simon and Grueber, Martin}, month = may, year = {2021}, } @misc{rosinach_mapping_2022, title = {Mapping {OHDSI} {OMOP} {Common} {Data} {Model} and {GA4GH} {Phenopackets} for {COVID}-19 disease epidemics and analytics}, url = {https://biohackrxiv.org/ep3xh/}, doi = {10.37044/osf.io/ep3xh}, abstract = {The COVID-19 crisis demonstrates a critical requirement for rapid and efficient sharing of data to facilitate the global response to this and future pandemics. Our project aims are to enhance interoperability between health and research data by mapping Phenopackets and OMOP schemas, and representing COVID-19 metadata using the FAIR principles to enable discovery, integration and analysis of genotypic and phenotypic data. Here, we present our outcomes after one week of BioHacking together 17 participants (10 new to the project), from different countries (CH, US and in EU), and continents.}, language = {en-us}, urldate = {2023-02-14}, publisher = {BioHackrXiv}, author = {Rosinach, Núria Queralt and Moreno, Pablo Alarcón and Callahan, Tiffany and Delussu, Giovanni and Fraboulet, Charlotte and Jacobsen, Jules and Castro, Leyla Jael and Kaliyaperumal, Rajaram and Kulmanov, Maxat and Robinson, Peter and Satagopam, Venkata and Siapos, Anastasios and Touré, Vasundra and Welter, Danielle}, month = nov, year = {2022}, keywords = {COVID-19, FAIR, Federated learning, Health data, Life Sciences, Machine learning, Medicine and Health Sciences, OMOP, Phenopackets, Semantic Web}, } @article{repchevsky_open_2022, title = {Open source {Java} implementation of the {Beacon} v2 {API}}, volume = {11}, url = {https://f1000research.com/posters/11-610}, doi = {10.7490/f1000research.1118980.1}, abstract = {Read this work by Repchevsky D, at F1000Research.}, urldate = {2023-02-14}, journal = {F1000Research}, author = {Repchevsky, Dmitry and Capella-Gutierrez, Salvador and Gelpí, Josep L.}, month = jun, year = {2022}, } @article{rueda_beacon_2022, title = {Beacon {V2} {Reference} {Implementation}: a {Toolkit} to enable federated sharing of genomic and phenotypic data}, issn = {1367-4803}, shorttitle = {Beacon {V2} {Reference} {Implementation}}, url = {https://doi.org/10.1093/bioinformatics/btac568}, doi = {10.1093/bioinformatics/btac568}, abstract = {Beacon v2 is an API specification established by the Global Alliance for Genomics and Health initiative (GA4GH) that defines a standard for federated discovery of genomic and phenotypic data. Here we present the Beacon v2 Reference Implementation (B2RI), a set of open-source software tools that allow lighting up a local Beacon instance “out-of-the-box”. Along with the software, we have created detailed “Read the Docs” documentation that includes information on deployment and installation.The B2RI is released under GNU General Public License v3.0 and Apache License v2.0. Documentation and source code is available at: https://b2ri-documentation.readthedocs.ioSupplementary data are available at Bioinformatics online.}, urldate = {2022-08-24}, journal = {Bioinformatics}, author = {Rueda, Manuel and Ariosa, Roberto and Moldes, Mauricio and Rambla, Jordi}, month = aug, year = {2022}, pages = {btac568}, } @article{rambla_beacon_2022, title = {Beacon v2 and {Beacon} networks: {A} “lingua franca” for federated data discovery in biomedical genomics, and beyond}, volume = {43}, issn = {1098-1004}, shorttitle = {Beacon v2 and {Beacon} networks}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/humu.24369}, doi = {10.1002/humu.24369}, abstract = {Beacon is a basic data discovery protocol issued by the Global Alliance for Genomics and Health (GA4GH). The main goal addressed by version 1 of the Beacon protocol was to test the feasibility of broadly sharing human genomic data, through providing simple “yes” or “no” responses to queries about the presence of a given variant in datasets hosted by Beacon providers. The popularity of this concept has fostered the design of a version 2, that better serves real-world requirements and addresses the needs of clinical genomics research and healthcare, as assessed by several contributing projects and organizations. Particularly, rare disease genetics and cancer research will benefit from new case level and genomic variant level requests and the enabling of richer phenotype and clinical queries as well as support for fuzzy searches. Beacon is designed as a “lingua franca” to bridge data collections hosted in software solutions with different and rich interfaces. Beacon version 2 works alongside popular standards like Phenopackets, OMOP, or FHIR, allowing implementing consortia to return matches in beacon responses and provide a handover to their preferred data exchange format. The protocol is being explored by other research domains and is being tested in several international projects.}, language = {en}, number = {6}, urldate = {2022-08-31}, journal = {Human Mutation}, author = {Rambla, Jordi and Baudis, Michael and Ariosa, Roberto and Beck, Tim and Fromont, Lauren A. and Navarro, Arcadi and Paloots, Rahel and Rueda, Manuel and Saunders, Gary and Singh, Babita and Spalding, John D. and Törnroos, Juha and Vasallo, Claudia and Veal, Colin D. and Brookes, Anthony J.}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/humu.24369}, keywords = {Beacon, GA4GH, REST API, clinical genomics, data discovery, data sharing}, pages = {791--799}, } @article{lynch_australian_2023, title = {Australian public perspectives on genomic data storage and sharing: {Benefits}, concerns and access preferences}, volume = {66}, issn = {1769-7212}, shorttitle = {Australian public perspectives on genomic data storage and sharing}, url = {https://www.sciencedirect.com/science/article/pii/S1769721222002579}, doi = {10.1016/j.ejmg.2022.104676}, abstract = {Diagnostic genomic sequencing generates unprecedented amounts of data. In addition to its primary use, this data could be used for a wide range of secondary purposes, including research and informing future healthcare for the data donor. These opportunities may require data to be shared with third parties. Although effective data sharing relies on public support, there are barriers which may prevent people from choosing to donate their genomic data and surprisingly few studies explore these barriers in depth. To address this need, this study aimed to qualitatively explore the Australian public's views and preferences for storing and sharing genomic data. Online focus groups were recorded, transcribed, and analysed using inductive content analysis. A total of 7 focus groups were conducted with 39 members of the Australian public ranging from 18 to 67 years of age. Participants were mostly supportive of genomic data being stored and shared for secondary purposes, recognising the potential benefits for individual health and wider medical research. However, some concerns were identified. Participants felt genomic data was particularly sensitive information, and raised the potential for discrimination, stigma, and other malicious uses of such data. Concerns for privacy and security of the data were also prevalent. Trustworthiness of data users was important when considering who genomic data should be shared with. Although participants were supportive of data being freely available to health professionals and researchers, they were opposed to insurance companies and employers accessing the data. There was greater controversy around sharing data with law enforcement and pharmaceutical companies. Participants recognised both benefits and harms to sharing with law enforcement. They were also cognizant of the dual purpose of pharmaceutical companies as both research and profit-driven organisations. Finally, participants expressed varying perspectives about sharing genomic data with family members, yet most agreed that explicit consent from the data donor should be required to share their information with relatives. This study highlighted several of the Australian public's perceived barriers and motivators for the storage and sharing of genomic data. Participants recognised both the benefits of collecting, storing and sharing such data widely but also the potential for harm from data misuse. While public acceptance of such endeavours is required to maximise the volume of data made available, the concerns around data access and security need to be addressed before this can occur. These findings also highlight the nuance and ethical complexity of decisions about who we should allow to access donated genomic data. These perspectives will be essential in helping to shape the way large-scale genomic data storage and sharing is developed and implemented in Australia, and internationally.}, language = {en}, number = {1}, urldate = {2023-02-14}, journal = {European Journal of Medical Genetics}, author = {Lynch, Fiona and Meng, Yan and Best, Stephanie and Goranitis, Ilias and Savulescu, Julian and Gyngell, Christopher and Vears, Danya F.}, month = jan, year = {2023}, keywords = {Bioethics, Data management, Genome, Genomic medicine, Public opinion, Qualitative research}, pages = {104676}, } @misc{ga4gh_new_2022, type = {Article}, title = {New release of {GA4GH} {Beacon} expands genomic and clinical data access}, url = {https://www.ga4gh.org/news/new-release-of-ga4gh-beacon-expands-genomic-and-clinical-data-access/}, urldate = {2023-02-14}, author = {GA4GH}, month = may, year = {2022}, keywords = {Beacon, GA4GH}, } @article{hernandez_applying_2022, title = {Applying {FHIR} {Genomics} for {Research} – {From} {Sequencing} to {Database}}, volume = {2022}, issn = {1942-597X}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9285172/}, abstract = {The availability of next-generation sequencing (NGS) technologies and their continually declining costs have resulted in the accumulation of large genomic data sets. NGS results have traditionally been delivered in PDF format, and in some cases, structured data, e.g., XML or JSON formats, are also made available, but there is a lack of uniformity around the profiling of external vendor testing platforms. Atrium Health Wake Forest Baptist and TriNetX have harmonized and mapped genomic data to FHIR Genomic standards and imported it into the TriNetX database through a data pipeline. This process is translatable to other sequencing platforms and to other institutions. The addition of genotypic data to the TriNetX database to the reservoir of phenotypic data will promote enhanced industry trial recruitment, (ii) comprehensive intra-institutional genomic benchmarking/quality improvement, and eventually (iii) sweeping inter-institutional genomic research and treatment paradigm shifts.}, urldate = {2022-11-03}, journal = {AMIA Annual Symposium Proceedings}, author = {Hernandez, Sean and Fairchild, Karen and Pemberton, Mark and Dahmer, Jonathan and Zhang, Wei and Palchuk, Matvey B. and Topaloglu, Umit}, month = may, year = {2022}, pmid = {35854733}, pmcid = {PMC9285172}, pages = {236--243}, } @article{wong_singapore_2023, title = {The {Singapore} {National} {Precision} {Medicine} {Strategy}}, copyright = {2023 Springer Nature America, Inc.}, issn = {1546-1718}, url = {https://www.nature.com/articles/s41588-022-01274-x}, doi = {10.1038/s41588-022-01274-x}, abstract = {Precision medicine promises to transform healthcare for groups and individuals through early disease detection, refining diagnoses and tailoring treatments. Analysis of large-scale genomic–phenotypic databases is a critical enabler of precision medicine. Although Asia is home to 60\% of the world’s population, many Asian ancestries are under-represented in existing databases, leading to missed opportunities for new discoveries, particularly for diseases most relevant for these populations. The Singapore National Precision Medicine initiative is a whole-of-government 10-year initiative aiming to generate precision medicine data of up to one million individuals, integrating genomic, lifestyle, health, social and environmental data. Beyond technologies, routine adoption of precision medicine in clinical practice requires social, ethical, legal and regulatory barriers to be addressed. Identifying driver use cases in which precision medicine results in standardized changes to clinical workflows or improvements in population health, coupled with health economic analysis to demonstrate value-based healthcare, is a vital prerequisite for responsible health system adoption.}, language = {en}, urldate = {2023-02-07}, journal = {Nature Genetics}, author = {Wong, Eleanor and Bertin, Nicolas and Hebrard, Maxime and Tirado-Magallanes, Roberto and Bellis, Claire and Lim, Weng Khong and Chua, Chee Yong and Tong, Philomena Mei Lin and Chua, Raymond and Mak, Kenneth and Lim, Tit Meng and Cheong, Wei Yang and Thien, Kwee Eng and Goh, Khean Teik and Chai, Jin-Fang and Lee, Jimmy and Sung, Joseph Jao-Yiu and Wong, Tien Yin and Chin, Calvin Woon Loong and Gluckman, Peter D. and Goh, Liuh Ling and Ban, Kenneth Hon Kim and Tan, Tin Wee and Sim, Xueling and Cheng, Ching-Yu and Davila, Sonia and Karnani, Neerja and Leong, Khai Pang and Liu, Jianjun and Prabhakar, Shyam and Maurer-Stroh, Sebastian and Verma, Chandra Shekhar and Krishnaswamy, Pavitra and Goh, Rick Siow Mong and Chia, Irenaeus and Ho, Clarissa and Low, Doreen and Virabhak, Suchin and Yong, Jacklyn and Zheng, Weiling and Seow, Shih Wee and Seck, Yee Kwang and Koh, Mingshi and Chambers, John C. and Tai, E. Shyong and Tan, Patrick}, month = jan, year = {2023}, note = {Publisher: Nature Publishing Group}, keywords = {Population genetics, Preventive medicine}, pages = {1--9}, } @article{paltiel_protection_2023, title = {Protection of genomic data and the {Australian} {Privacy} {Act}: when are genomic data ‘personal information’?}, issn = {2044-3994}, shorttitle = {Protection of genomic data and the {Australian} {Privacy} {Act}}, url = {https://doi.org/10.1093/idpl/ipad002}, doi = {10.1093/idpl/ipad002}, urldate = {2023-02-07}, journal = {International Data Privacy Law}, author = {Paltiel, Minna and Taylor, Mark and Newson, Ainsley}, month = feb, year = {2023}, pages = {ipad002}, } @misc{noauthor_panelapp_nodate, title = {{PanelApp} on {Vimeo}}, url = {https://vimeo.com/showcase/4754293}, urldate = {2023-01-25}, } @article{martin_panelapp_2019, title = {{PanelApp} crowdsources expert knowledge to establish consensus diagnostic gene panels}, volume = {51}, copyright = {2019 Springer Nature America, Inc.}, issn = {1546-1718}, url = {https://www.nature.com/articles/s41588-019-0528-2}, doi = {10.1038/s41588-019-0528-2}, abstract = {A fundamental problem in rare-disease diagnostics is the lack of consensus as to which genes have sufficient evidence to attribute causation. To address this issue, we have created PanelApp (https://panelapp.genomicsengland.co.uk), a publicly available knowledge base of curated virtual gene panels.}, language = {en}, number = {11}, urldate = {2023-01-25}, journal = {Nature Genetics}, author = {Martin, Antonio Rueda and Williams, Eleanor and Foulger, Rebecca E. and Leigh, Sarah and Daugherty, Louise C. and Niblock, Olivia and Leong, Ivone U. S. and Smith, Katherine R. and Gerasimenko, Oleg and Haraldsdottir, Eik and Thomas, Ellen and Scott, Richard H. and Baple, Emma and Tucci, Arianna and Brittain, Helen and de Burca, Anna and Ibañez, Kristina and Kasperaviciute, Dalia and Smedley, Damian and Caulfield, Mark and Rendon, Augusto and McDonagh, Ellen M.}, month = nov, year = {2019}, note = {Number: 11 Publisher: Nature Publishing Group}, keywords = {Clinical genetics, Diseases, Genomic analysis, Genomics, Medical genetics}, pages = {1560--1565}, } @article{distefano_gene_2022, title = {The {Gene} {Curation} {Coalition}: {A} global effort to harmonize gene–disease evidence resources}, volume = {24}, issn = {1098-3600, 1530-0366}, shorttitle = {The {Gene} {Curation} {Coalition}}, url = {https://www.gimjournal.org/article/S1098-3600(22)00746-8/fulltext}, doi = {10.1016/j.gim.2022.04.017}, language = {English}, number = {8}, urldate = {2023-01-25}, journal = {Genetics in Medicine}, author = {DiStefano, Marina T. and Goehringer, Scott and Babb, Lawrence and Alkuraya, Fowzan S. and Amberger, Joanna and Amin, Mutaz and Austin-Tse, Christina and Balzotti, Marie and Berg, Jonathan S. and Birney, Ewan and Bocchini, Carol and Bruford, Elspeth A. and Coffey, Alison J. and Collins, Heather and Cunningham, Fiona and Daugherty, Louise C. and Einhorn, Yaron and Firth, Helen V. and Fitzpatrick, David R. and Foulger, Rebecca E. and Goldstein, Jennifer and Hamosh, Ada and Hurles, Matthew R. and Leigh, Sarah E. and Leong, Ivone U. S. and Maddirevula, Sateesh and Martin, Christa L. and McDonagh, Ellen M. and Olry, Annie and Puzriakova, Arina and Radtke, Kelly and Ramos, Erin M. and Rath, Ana and Riggs, Erin Rooney and Roberts, Angharad M. and Rodwell, Charlotte and Snow, Catherine and Stark, Zornitza and Tahiliani, Jackie and Tweedie, Susan and Ware, James S. and Weller, Phillip and Williams, Eleanor and Wright, Caroline F. and Yates, Thabo Michael and Rehm, Heidi L.}, month = aug, year = {2022}, pmid = {35507016}, note = {Publisher: Elsevier}, keywords = {Database, GenCC, Gene curation, Genetic diagnosis, The Gene Curation Coalition}, pages = {1732--1742}, } @article{zurek_solve-rd_2021, title = {Solve-{RD}: systematic pan-{European} data sharing and collaborative analysis to solve rare diseases}, volume = {29}, copyright = {2021 The Author(s)}, issn = {1476-5438}, shorttitle = {Solve-{RD}}, url = {https://www.nature.com/articles/s41431-021-00859-0}, doi = {10.1038/s41431-021-00859-0}, abstract = {For the first time in Europe hundreds of rare disease (RD) experts team up to actively share and jointly analyse existing patient’s data. Solve-RD is a Horizon 2020-supported EU flagship project bringing together {\textgreater}300 clinicians, scientists, and patient representatives of 51 sites from 15 countries. Solve-RD is built upon a core group of four European Reference Networks (ERNs; ERN-ITHACA, ERN-RND, ERN-Euro NMD, ERN-GENTURIS) which annually see more than 270,000 RD patients with respective pathologies. The main ambition is to solve unsolved rare diseases for which a molecular cause is not yet known. This is achieved through an innovative clinical research environment that introduces novel ways to organise expertise and data. Two major approaches are being pursued (i) massive data re-analysis of {\textgreater}19,000 unsolved rare disease patients and (ii) novel combined -omics approaches. The minimum requirement to be eligible for the analysis activities is an inconclusive exome that can be shared with controlled access. The first preliminary data re-analysis has already diagnosed 255 cases form 8393 exomes/genome datasets. This unprecedented degree of collaboration focused on sharing of data and expertise shall identify many new disease genes and enable diagnosis of many so far undiagnosed patients from all over Europe.}, language = {en}, number = {9}, urldate = {2023-01-25}, journal = {European Journal of Human Genetics}, author = {Zurek, Birte and Ellwanger, Kornelia and Vissers, Lisenka E. L. M. and Schüle, Rebecca and Synofzik, Matthis and Töpf, Ana and de Voer, Richarda M. and Laurie, Steven and Matalonga, Leslie and Gilissen, Christian and Ossowski, Stephan and ’t Hoen, Peter A. C. and Vitobello, Antonio and Schulze-Hentrich, Julia M. and Riess, Olaf and Brunner, Han G. and Brookes, Anthony J. and Rath, Ana and Bonne, Gisèle and Gumus, Gulcin and Verloes, Alain and Hoogerbrugge, Nicoline and Evangelista, Teresinha and Harmuth, Tina and Swertz, Morris and Spalding, Dylan and Hoischen, Alexander and Beltran, Sergi and Graessner, Holm}, month = sep, year = {2021}, note = {Number: 9 Publisher: Nature Publishing Group}, keywords = {Diseases, Medical genetics}, pages = {1325--1331}, } @article{laurie_rd-connect_2022, title = {The {RD}-{Connect} {Genome}-{Phenome} {Analysis} {Platform}: {Accelerating} diagnosis, research, and gene discovery for rare diseases}, volume = {43}, issn = {1098-1004}, shorttitle = {The {RD}-{Connect} {Genome}-{Phenome} {Analysis} {Platform}}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/humu.24353}, doi = {10.1002/humu.24353}, abstract = {Rare disease patients are more likely to receive a rapid molecular diagnosis nowadays thanks to the wide adoption of next-generation sequencing. However, many cases remain undiagnosed even after exome or genome analysis, because the methods used missed the molecular cause in a known gene, or a novel causative gene could not be identified and/or confirmed. To address these challenges, the RD-Connect Genome-Phenome Analysis Platform (GPAP) facilitates the collation, discovery, sharing, and analysis of standardized genome-phenome data within a collaborative environment. Authorized clinicians and researchers submit pseudonymised phenotypic profiles encoded using the Human Phenotype Ontology, and raw genomic data which is processed through a standardized pipeline. After an optional embargo period, the data are shared with other platform users, with the objective that similar cases in the system and queries from peers may help diagnose the case. Additionally, the platform enables bidirectional discovery of similar cases in other databases from the Matchmaker Exchange network. To facilitate genome-phenome analysis and interpretation by clinical researchers, the RD-Connect GPAP provides a powerful user-friendly interface and leverages tens of information sources. As a result, the resource has already helped diagnose hundreds of rare disease patients and discover new disease causing genes.}, language = {en}, number = {6}, urldate = {2023-01-25}, journal = {Human Mutation}, author = {Laurie, Steven and Piscia, Davide and Matalonga, Leslie and Corvó, Alberto and Fernández-Callejo, Marcos and Garcia-Linares, Carles and Hernandez-Ferrer, Carles and Luengo, Cristina and Martínez, Inés and Papakonstantinou, Anastasios and Picó-Amador, Daniel and Protasio, Joan and Thompson, Rachel and Tonda, Raul and Bayés, Mònica and Bullich, Gemma and Camps-Puchadas, Jordi and Paramonov, Ida and Trotta, Jean-Rémi and Alonso, Angel and Attimonelli, Marcella and Béroud, Christophe and Bros-Facer, Virginie and Buske, Orion J. and Cañada-Pallarés, Andrés and Fernández, José M. and Hansson, Mats G. and Horvath, Rita and Jacobsen, Julius O.B. and Kaliyaperumal, Rajaram and Lair-Préterre, Séverine and Licata, Luana and Lopes, Pedro and López-Martín, Estrella and Mascalzoni, Deborah and Monaco, Lucia and Pérez-Jurado, Luis A. and Posada de la Paz, Manuel and Rambla, Jordi and Rath, Ana and Riess, Olaf and Robinson, Peter N. and Salgado, David and Smedley, Damian and Spalding, Dylan and 't Hoen, Peter A. C. and Töpf, Ana and Zaharieva, Irina and Graessner, Holm and Gut, Ivo G. and Lochmüller, Hanns and Beltran, Sergi}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/humu.24353}, keywords = {NGS, data sharing, data standardization, diagnostics, genome analysis, patient matchmaking, rare diseases}, pages = {717--733}, } @article{philippakis_matchmaker_2015, title = {The {Matchmaker} {Exchange}: {A} {Platform} for {Rare} {Disease} {Gene} {Discovery}}, volume = {36}, issn = {1098-1004}, shorttitle = {The {Matchmaker} {Exchange}}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/humu.22858}, doi = {10.1002/humu.22858}, abstract = {There are few better examples of the need for data sharing than in the rare disease community, where patients, physicians, and researchers must search for “the needle in a haystack” to uncover rare, novel causes of disease within the genome. Impeding the pace of discovery has been the existence of many small siloed datasets within individual research or clinical laboratory databases and/or disease-specific organizations, hoping for serendipitous occasions when two distant investigators happen to learn they have a rare phenotype in common and can “match” these cases to build evidence for causality. However, serendipity has never proven to be a reliable or scalable approach in science. As such, the Matchmaker Exchange (MME) was launched to provide a robust and systematic approach to rare disease gene discovery through the creation of a federated network connecting databases of genotypes and rare phenotypes using a common application programming interface (API). The core building blocks of the MME have been defined and assembled. Three MME services have now been connected through the API and are available for community use. Additional databases that support internal matching are anticipated to join the MME network as it continues to grow.}, number = {10}, urldate = {2023-01-25}, journal = {Human Mutation}, author = {Philippakis, Anthony A. and Azzariti, Danielle R. and Beltran, Sergi and Brookes, Anthony J. and Brownstein, Catherine A. and Brudno, Michael and Brunner, Han G. and Buske, Orion J. and Carey, Knox and Doll, Cassie and Dumitriu, Sergiu and Dyke, Stephanie O.M. and den Dunnen, Johan T. and Firth, Helen V. and Gibbs, Richard A. and Girdea, Marta and Gonzalez, Michael and Haendel, Melissa A. and Hamosh, Ada and Holm, Ingrid A. and Huang, Lijia and Hurles, Matthew E. and Hutton, Ben and Krier, Joel B. and Misyura, Andriy and Mungall, Christopher J. and Paschall, Justin and Paten, Benedict and Robinson, Peter N. and Schiettecatte, François and Sobreira, Nara L. and Swaminathan, Ganesh J. and Taschner, Peter E. and Terry, Sharon F. and Washington, Nicole L. and Züchner, Stephan and Boycott, Kym M. and Rehm, Heidi L.}, year = {2015}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/humu.22858}, keywords = {GA4GH, IRDiRC, Matchmaker Exchange, gene discovery, genomic API, matchmaking, rare disease}, pages = {915--921}, } @article{alterovitz_fhir_2020, title = {{FHIR} {Genomics}: enabling standardization for precision medicine use cases}, volume = {5}, copyright = {2020 The Author(s)}, issn = {2056-7944}, shorttitle = {{FHIR} {Genomics}}, url = {https://www.nature.com/articles/s41525-020-0115-6}, doi = {10.1038/s41525-020-0115-6}, abstract = {The development of Fast Healthcare Interoperability Resources (FHIR) Genomics, a feasible and efficient method for exchanging complex clinical genomic data and interpretations, is described. FHIR Genomics is a subset of the emerging Health Level 7 FHIR standard and targets data from increasingly available technologies such as next-generation sequencing. Much care and integration of feedback have been taken to ease implementation, facilitate wide-scale interoperability, and enable modern app development toward a complete precision medicine standard. A new use case, the integration of the Variant Interpretation for Cancer Consortium (VICC) “meta-knowledgebase” into a third-party application, is described.}, language = {en}, number = {1}, urldate = {2023-01-18}, journal = {npj Genomic Medicine}, author = {Alterovitz, Gil and Heale, Bret and Jones, James and Kreda, David and Lin, Fan and Liu, Lei and Liu, Xin and Mandl, Kenneth D. and Poloway, David W. and Ramoni, Rachel and Wagner, Alex and Warner, Jeremy L.}, month = mar, year = {2020}, note = {Number: 1 Publisher: Nature Publishing Group}, keywords = {Clinical genetics, Data processing, Genomics, Health policy, High-throughput screening}, pages = {1--4}, } @misc{noauthor_medical_nodate, type = {Page}, title = {Medical {Genome} {Reference} {Bank}}, url = {https://www.garvan.org.au/research/kinghorn-centre-for-clinical-genomics/research-programs/sydney-genomics-collaborative/mgrb}, abstract = {Thousands of genome sequences from healthy, aged individuals will provide the ideal background for the future of genomic research.}, language = {en}, urldate = {2023-01-17}, journal = {Garvan Institute of Medical Research}, } @article{pinese_medical_2020, title = {The {Medical} {Genome} {Reference} {Bank} contains whole genome and phenotype data of 2570 healthy elderly}, volume = {11}, copyright = {2020 The Author(s)}, issn = {2041-1723}, url = {https://www.nature.com/articles/s41467-019-14079-0}, doi = {10.1038/s41467-019-14079-0}, abstract = {Population health research is increasingly focused on the genetic determinants of healthy ageing, but there is no public resource of whole genome sequences and phenotype data from healthy elderly individuals. Here we describe the first release of the Medical Genome Reference Bank (MGRB), comprising whole genome sequence and phenotype of 2570 elderly Australians depleted for cancer, cardiovascular disease, and dementia. We analyse the MGRB for single-nucleotide, indel and structural variation in the nuclear and mitochondrial genomes. MGRB individuals have fewer disease-associated common and rare germline variants, relative to both cancer cases and the gnomAD and UK Biobank cohorts, consistent with risk depletion. Age-related somatic changes are correlated with grip strength in men, suggesting blood-derived whole genomes may also provide a biologic measure of age-related functional deterioration. The MGRB provides a broadly applicable reference cohort for clinical genetics and genomic association studies, and for understanding the genetics of healthy ageing.}, language = {en}, number = {1}, urldate = {2023-01-17}, journal = {Nature Communications}, author = {Pinese, Mark and Lacaze, Paul and Rath, Emma M. and Stone, Andrew and Brion, Marie-Jo and Ameur, Adam and Nagpal, Sini and Puttick, Clare and Husson, Shane and Degrave, Dmitry and Cristina, Tina Navin and Kahl, Vivian F. S. and Statham, Aaron L. and Woods, Robyn L. and McNeil, John J. and Riaz, Moeen and Barr, Margo and Nelson, Mark R. and Reid, Christopher M. and Murray, Anne M. and Shah, Raj C. and Wolfe, Rory and Atkins, Joshua R. and Fitzsimmons, Chantel and Cairns, Heath M. and Green, Melissa J. and Carr, Vaughan J. and Cowley, Mark J. and Pickett, Hilda A. and James, Paul A. and Powell, Joseph E. and Kaplan, Warren and Gibson, Greg and Gyllensten, Ulf and Cairns, Murray J. and McNamara, Martin and Dinger, Marcel E. and Thomas, David M.}, month = jan, year = {2020}, note = {Number: 1 Publisher: Nature Publishing Group}, keywords = {Ageing, Genetics research, Medical genomics, Rare variants}, pages = {435}, } @article{ma_iprox_2019, title = {{iProX}: an integrated proteome resource}, volume = {47}, issn = {0305-1048}, shorttitle = {{iProX}}, url = {https://doi.org/10.1093/nar/gky869}, doi = {10.1093/nar/gky869}, abstract = {Sharing of research data in public repositories has become best practice in academia. With the accumulation of massive data, network bandwidth and storage requirements are rapidly increasing. The ProteomeXchange (PX) consortium implements a mode of centralized metadata and distributed raw data management, which promotes effective data sharing. To facilitate open access of proteome data worldwide, we have developed the integrated proteome resource iProX (http://www.iprox.org) as a public platform for collecting and sharing raw data, analysis results and metadata obtained from proteomics experiments. The iProX repository employs a web-based proteome data submission process and open sharing of mass spectrometry-based proteomics datasets. Also, it deploys extensive controlled vocabularies and ontologies to annotate proteomics datasets. Users can use a GUI to provide and access data through a fast Aspera-based transfer tool. iProX is a full member of the PX consortium; all released datasets are freely accessible to the public. iProX is based on a high availability architecture and has been deployed as part of the proteomics infrastructure of China, ensuring long-term and stable resource support. iProX will facilitate worldwide data analysis and sharing of proteomics experiments.}, number = {D1}, urldate = {2023-01-16}, journal = {Nucleic Acids Research}, author = {Ma, Jie and Chen, Tao and Wu, Songfeng and Yang, Chunyuan and Bai, Mingze and Shu, Kunxian and Li, Kenli and Zhang, Guoqing and Jin, Zhong and He, Fuchu and Hermjakob, Henning and Zhu, Yunping}, month = jan, year = {2019}, pages = {D1211--D1217}, } @article{okuda_jpostrepo_2017, title = {{jPOSTrepo}: an international standard data repository for proteomes}, volume = {45}, issn = {0305-1048}, shorttitle = {{jPOSTrepo}}, url = {https://doi.org/10.1093/nar/gkw1080}, doi = {10.1093/nar/gkw1080}, abstract = {Major advancements have recently been made in mass spectrometry-based proteomics, yielding an increasing number of datasets from various proteomics projects worldwide. In order to facilitate the sharing and reuse of promising datasets, it is important to construct appropriate, high-quality public data repositories. jPOSTrepo (https://repository.jpostdb.org/) has successfully implemented several unique features, including high-speed file uploading, flexible file management and easy-to-use interfaces. This repository has been launched as a public repository containing various proteomic datasets and is available for researchers worldwide. In addition, our repository has joined the ProteomeXchange consortium, which includes the most popular public repositories such as PRIDE in Europe for MS/MS datasets and PASSEL for SRM datasets in the USA. Later MassIVE was introduced in the USA and accepted into the ProteomeXchange, as was our repository in July 2016, providing important datasets from Asia/Oceania. Accordingly, this repository thus contributes to a global alliance to share and store all datasets from a wide variety of proteomics experiments. Thus, the repository is expected to become a major repository, particularly for data collected in the Asia/Oceania region.}, number = {D1}, urldate = {2023-01-16}, journal = {Nucleic Acids Research}, author = {Okuda, Shujiro and Watanabe, Yu and Moriya, Yuki and Kawano, Shin and Yamamoto, Tadashi and Matsumoto, Masaki and Takami, Tomoyo and Kobayashi, Daiki and Araki, Norie and Yoshizawa, Akiyasu C. and Tabata, Tsuyoshi and Sugiyama, Naoyuki and Goto, Susumu and Ishihama, Yasushi}, month = jan, year = {2017}, pages = {D1107--D1111}, } @misc{iudin_empiar_2022, title = {{EMPIAR}: {The} {Electron} {Microscopy} {Public} {Image} {Archive}}, copyright = {© 2022, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, shorttitle = {{EMPIAR}}, url = {https://www.biorxiv.org/content/10.1101/2022.10.04.510785v1}, doi = {10.1101/2022.10.04.510785}, abstract = {Public archiving in structural biology is well established with the Protein Data Bank (PDB; wwPDB.org) catering for atomic models and the Electron Microscopy Data Bank (EMDB; emdb-empiar.org) for 3D reconstructions from cryo-EM experiments. Even before the recent rapid growth in cryo-EM, there was an expressed community need for a public archive of image data from cryo-EM experiments for validation, software development, testing and training. Concomitantly, the proliferation of 3D imaging techniques for cells, tissues and organisms using volume EM (vEM) and X-ray tomography (XT) led to calls from these communities to publicly archive such data as well. EMPIAR (empiar.org) was developed as a public archive for raw cryo-EM image data and for 3D reconstructions from vEM and XT experiments and now comprises over a thousand entries totalling over 2 petabytes of data. EMPIAR resources include a deposition system, entry pages, facilities to search, visualise and download datasets, and a REST API for programmatic access to entry metadata. The success of EMPIAR also poses significant challenges for the future in dealing with the very fast growth in the volume of data and in enhancing its reusability.}, language = {en}, urldate = {2023-01-16}, publisher = {bioRxiv}, author = {Iudin, Andrii and Korir, Paul K. and Somasundharam, Sriram and Weyand, Simone and Cattavitello, Cesare and Fonseca, Neli and Salih, Osman and Kleywegt, Gerard J. and Patwardhan, Ardan}, month = oct, year = {2022}, note = {Pages: 2022.10.04.510785 Section: New Results}, } @article{malakar_balancing_2023, title = {Balancing the safeguarding of privacy and data sharing: perceptions of genomic professionals on patient genomic data ownership in {Australia}}, copyright = {2023 The Author(s)}, issn = {1476-5438}, shorttitle = {Balancing the safeguarding of privacy and data sharing}, url = {https://www.nature.com/articles/s41431-022-01273-w}, doi = {10.1038/s41431-022-01273-w}, abstract = {There are inherent complexities and tensions in achieving a responsible balance between safeguarding patients’ privacy and sharing genomic data for advancing health and medical science. A growing body of literature suggests establishing patient genomic data ownership, enabled by blockchain technology, as one approach for managing these priorities. We conducted an online survey, applying a mixed methods approach to collect quantitative (using scale questions) and qualitative data (using open-ended questions). We explored the views of 117 genomic professionals (clinical geneticists, genetic counsellors, bioinformaticians, and researchers) towards patient data ownership in Australia. Data analysis revealed most professionals agreed that patients have rights to data ownership. However, there is a need for a clearer understanding of the nature and implications of data ownership in this context as genomic data often is subject to collective ownership (e.g., with family members and laboratories). This research finds that while the majority of genomic professionals acknowledge the desire for patient data ownership, bioinformaticians and researchers expressed more favourable views than clinical geneticists and genetic counsellors, suggesting that their views on this issue may be shaped by how closely they interact with patients as part of their professional duties. This research also confirms that stronger health system infrastructure is a prerequisite for enabling patient data ownership, which needs to be underpinned by appropriate digital infrastructure (e.g., central vs. decentralised data storage), patient identity ownership (e.g., limited vs. self-sovereign identity), and policy at both federal and state levels.}, language = {en}, urldate = {2023-01-13}, journal = {European Journal of Human Genetics}, author = {Malakar, Yuwan and Lacey, Justine and Twine, Natalie A. and McCrea, Rod and Bauer, Denis C.}, month = jan, year = {2023}, note = {Publisher: Nature Publishing Group}, keywords = {Ethics, Health policy}, pages = {1--7}, } @misc{shih_raptor_2022, title = {{RAPTOR}: {A} {Five}-{Safes} approach to a secure, cloud native and serverless genomics data repository}, copyright = {© 2022, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, shorttitle = {{RAPTOR}}, url = {https://www.biorxiv.org/content/10.1101/2022.10.27.514127v3}, doi = {10.1101/2022.10.27.514127}, abstract = {Genomic researchers are increasingly utilizing commercial cloud platforms (CCPs) to manage their data and analytics needs. Commercial clouds allow researchers to grow their storage and analytics capacity on demand, keeping pace with expanding project data footprints and enabling researchers to avoid large capital expenditures while paying only for IT capacity consumed by their project. Cloud computing also allows researchers to overcome common network and storage bottlenecks encountered when combining or re-analysing large datasets. However, cloud computing presents a new set of challenges. Without adequate security controls, the risk of unauthorised access may be higher for data stored on the cloud. In addition, regulators are increasingly mandating data access patterns and specific security protocols on the storage and use of genomic data to safeguard rights of the study participants. While CCPs provide tools for security and regulatory compliance, utilising these tools to build the necessary controls required for cloud solutions is not trivial as such skill sets are not commonly found in a genomics lab. The Research Assets Provisioning and Tracking Online Repository (RAPTOR) by the Genome Institute of Singapore is a cloud native genomics data repository and analytics platform focusing on security and regulatory compliance. Using a “five-safes” framework (Safe Purpose, Safe People, Safe Settings, Safe Data and Safe Output), RAPTOR provides security and governance controls to data contributors and users leveraging cloud computing for sharing and analysis of large genomic datasets without the risk of security breaches or running afoul of regulations. RAPTOR can also enable data federation with other genomic data repositories using GA4GH community-defined standards, allowing researchers to boost the statistical power of their work and overcome geographic and ancestry limitations of data sets}, language = {en}, urldate = {2022-12-21}, publisher = {bioRxiv}, author = {Shih, Chih Chuan and Chen, Jieqi and Lee, Ai Shan and Bertin, Nicolas and Hebrard, Maxime and Khor, Chiea Chuen and Li, Zheng and Tan, Joanna Hui Juan and Meah, Wee Yang and Peh, Su Qin and Mok, Shi Qi and Sim, Kar Seng and Liu, Jianjun and Wang, Ling and Wong, Eleanor and Li, Jingmei and Tin, Aung and Cheng, Ching-Yu and Heng, Chew-Kiat and Yuan, Jian-Min and Koh, Woon-Puay and Saw, Seang Mei and Friedlander, Yechiel and Sim, Xueling and Chai, Jin Fang and Chong, Yap Seng and Davila, Sonia and Goh, Liuh Ling and Lee, Eng Sing and Wong, Tien Yin and Karnani, Neerja and Leong, Khai Pang and Yeo, Khung Keong and Chambers, John C. and Lim, Su Chi and Goh, Rick Siow Mong and Tan, Patrick and Dorajoo, Rajkumar}, month = nov, year = {2022}, note = {Pages: 2022.10.27.514127 Section: New Results}, } @article{reich_evaluation_2012, series = {Translating {Standards} into {Practice}: {Experiences} and {Lessons} {Learned} in {Biomedicine} and {Health} {Care}}, title = {Evaluation of alternative standardized terminologies for medical conditions within a network of observational healthcare databases}, volume = {45}, issn = {1532-0464}, url = {https://www.sciencedirect.com/science/article/pii/S153204641200069X}, doi = {10.1016/j.jbi.2012.05.002}, abstract = {Large electronic databases of health care information, such as administrative claims and electronic health records, are available and are being used in a number of public health settings, including drug safety surveillance. However, because of a lack of standardization, clinical terminologies may differ across databases. With the aid of existing resources and expert coders, we have developed mapping tables to convert ICD-9-CM diagnosis codes used in some existing databases to SNOMED-CT and MedDRA. In addition, previously developed definitions for specific health outcomes of interest were mapped to the same standardized vocabularies. We evaluated how vocabulary mapping affected (1) the retention of clinical data from two test databases, (2) the semantic space of outcome definitions, (3) the prevalence of each outcome in the test databases, and (4) the reliability of analytic methods designed to detect drug-outcome associations in the test databases. Although vocabulary mapping affected the semantic space of some outcome definitions, as well as the prevalence of some outcomes in the test databases, it had only minor effects on the analysis of drug-outcome associations. Furthermore, both SNOMED-CT and MedDRA were viable for use as standardized vocabularies in systems designed to perform active medical product surveillance using disparate sources of observational data.}, language = {en}, number = {4}, urldate = {2022-12-20}, journal = {Journal of Biomedical Informatics}, author = {Reich, Christian and Ryan, Patrick B. and Stang, Paul E. and Rocca, Mitra}, month = aug, year = {2012}, keywords = {Coding scheme, ICD-9-CM, Mapping, Taxonomy, Terminology standards, Vocabulary}, pages = {689--696}, } @article{cncb-ngdc_members_and_partners_database_2022, title = {Database {Resources} of the {National} {Genomics} {Data} {Center}, {China} {National} {Center} for {Bioinformation} in 2023}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkac1073}, doi = {10.1093/nar/gkac1073}, abstract = {The National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB), provides a family of database resources to support global academic and industrial communities. With the explosive accumulation of multi-omics data generated at an unprecedented rate, CNCB-NGDC constantly expands and updates core database resources by big data archive, integrative analysis and value-added curation. In the past year, efforts have been devoted to integrating multiple omics data, synthesizing the growing knowledge, developing new resources and upgrading a set of major resources. Particularly, several database resources are newly developed for infectious diseases and microbiology (MPoxVR, KGCoV, ProPan), cancer-trait association (ASCancer Atlas, TWAS Atlas, Brain Catalog, CCAS) as well as tropical plants (TCOD). Importantly, given the global health threat caused by monkeypox virus and SARS-CoV-2, CNCB-NGDC has newly constructed the monkeypox virus resource, along with frequent updates of SARS-CoV-2 genome sequences, variants as well as haplotypes. All the resources and services are publicly accessible at https://ngdc.cncb.ac.cn.}, urldate = {2022-12-15}, journal = {Nucleic Acids Research}, author = {{CNCB-NGDC Members and Partners}}, month = nov, year = {2022}, pages = {gkac1073}, } @article{wirth_privacy-preserving_2021, title = {Privacy-preserving data sharing infrastructures for medical research: systematization and comparison}, volume = {21}, issn = {1472-6947}, shorttitle = {Privacy-preserving data sharing infrastructures for medical research}, url = {https://doi.org/10.1186/s12911-021-01602-x}, doi = {10.1186/s12911-021-01602-x}, abstract = {Data sharing is considered a crucial part of modern medical research. Unfortunately, despite its advantages, it often faces obstacles, especially data privacy challenges. As a result, various approaches and infrastructures have been developed that aim to ensure that patients and research participants remain anonymous when data is shared. However, privacy protection typically comes at a cost, e.g. restrictions regarding the types of analyses that can be performed on shared data. What is lacking is a systematization making the trade-offs taken by different approaches transparent. The aim of the work described in this paper was to develop a systematization for the degree of privacy protection provided and the trade-offs taken by different data sharing methods. Based on this contribution, we categorized popular data sharing approaches and identified research gaps by analyzing combinations of promising properties and features that are not yet supported by existing approaches.}, number = {1}, urldate = {2022-12-08}, journal = {BMC Medical Informatics and Decision Making}, author = {Wirth, Felix Nikolaus and Meurers, Thierry and Johns, Marco and Prasser, Fabian}, month = aug, year = {2021}, keywords = {Biomedical data sharing, Data enclave, Distributed computing, Privacy, Secure multi-party computing, Systematization, Usefulness}, pages = {242}, } @article{garza_evaluating_2016, title = {Evaluating common data models for use with a longitudinal community registry}, volume = {64}, issn = {1532-0464}, url = {https://www.sciencedirect.com/science/article/pii/S1532046416301538}, doi = {10.1016/j.jbi.2016.10.016}, abstract = {Objective To evaluate common data models (CDMs) to determine which is best suited for sharing data from a large, longitudinal, electronic health record (EHR)-based community registry. Materials and methods Four CDMs were chosen from models in use for clinical research data: Sentinel v5.0 (referred to as the Mini-Sentinel CDM in previous versions), PCORnet v3.0 (an extension of the Mini-Sentinel CDM), OMOP v5.0, and CDISC SDTM v1.4. Each model was evaluated against 11 criteria adapted from previous research. The criteria fell into six categories: content coverage, integrity, flexibility, ease of querying, standards compatibility, and ease and extent of implementation. Results The OMOP CDM accommodated the highest percentage of our data elements (76\%), fared well on other requirements, and had broader terminology coverage than the other models. Sentinel and PCORnet fell short in content coverage with 37\% and 48\% matches respectively. Although SDTM accommodated a significant percentage of data elements (55\% true matches), 45\% of the data elements mapped to SDTM’s extension mechanism, known as Supplemental Qualifiers, increasing the number of joins required to query the data. Conclusion The OMOP CDM best met the criteria for supporting data sharing from longitudinal EHR-based studies. Conclusions may differ for other uses and associated data element sets, but the methodology reported here is easily adaptable to common data model evaluation for other uses.}, language = {en}, urldate = {2022-12-08}, journal = {Journal of Biomedical Informatics}, author = {Garza, Maryam and Del Fiol, Guilherme and Tenenbaum, Jessica and Walden, Anita and Zozus, Meredith Nahm}, month = dec, year = {2016}, keywords = {Common data model, Data model evaluation, Electronic health records}, pages = {333--341}, } @article{mcwhirter_holding_2022, title = {Holding {Human} {Research} {Ethics} {Committees} to {Account}: {A} {Role} for {Judicial} {Review}?}, shorttitle = {Holding {Human} {Research} {Ethics} {Committees} to {Account}}, url = {http://www8.austlii.edu.au/cgi-bin/viewdoc/au/journals/AdelLawRw//2022/14.html}, urldate = {2022-12-06}, journal = {Adelaide Law Review}, author = {McWhirter, Rebekah}, year = {2022}, } @misc{noauthor_sg10k_health_nodate, title = {{SG10K}\_Health}, url = {https://npm.a-star.edu.sg/}, language = {en-US}, urldate = {2022-11-23}, } @article{wu_large-scale_2019, title = {Large-{Scale} {Whole}-{Genome} {Sequencing} of {Three} {Diverse} {Asian} {Populations} in {Singapore}}, volume = {179}, issn = {0092-8674}, url = {https://www.sciencedirect.com/science/article/pii/S0092867419310700}, doi = {10.1016/j.cell.2019.09.019}, abstract = {Underrepresentation of Asian genomes has hindered population and medical genetics research on Asians, leading to population disparities in precision medicine. By whole-genome sequencing of 4,810 Singapore Chinese, Malays, and Indians, we found 98.3 million SNPs and small insertions or deletions, over half of which are novel. Population structure analysis demonstrated great representation of Asian genetic diversity by three ethnicities in Singapore and revealed a Malay-related novel ancestry component. Furthermore, demographic inference suggested that Malays split from Chinese ∼24,800 years ago and experienced significant admixture with East Asians ∼1,700 years ago, coinciding with the Austronesian expansion. Additionally, we identified 20 candidate loci for natural selection, 14 of which harbored robust associations with complex traits and diseases. Finally, we show that our data can substantially improve genotype imputation in diverse Asian and Oceanian populations. These results highlight the value of our data as a resource to empower human genetics discovery across broad geographic regions.}, language = {en}, number = {3}, urldate = {2022-11-23}, journal = {Cell}, author = {Wu, Degang and Dou, Jinzhuang and Chai, Xiaoran and Bellis, Claire and Wilm, Andreas and Shih, Chih Chuan and Soon, Wendy Wei Jia and Bertin, Nicolas and Lin, Clarabelle Bitong and Khor, Chiea Chuen and DeGiorgio, Michael and Cheng, Shanshan and Bao, Li and Karnani, Neerja and Hwang, William Ying Khee and Davila, Sonia and Tan, Patrick and Shabbir, Asim and Moh, Angela and Tan, Eng-King and Foo, Jia Nee and Goh, Liuh Ling and Leong, Khai Pang and Foo, Roger S. Y. and Lam, Carolyn Su Ping and Richards, Arthur Mark and Cheng, Ching-Yu and Aung, Tin and Wong, Tien Yin and Ng, Huck Hui and Ackers-Johnson, Matthew Andrew and Aliwarga, Edita and Ban, Kenneth Hon Kim and Bertrand, Denis and Chambers, John C. and Chan, Dana Leng Hui and Chan, Cheryl Xue Li and Chee, Miao Li and Chee, Miao Ling and Chen, Pauline and Chen, Yunxin and Chew, Elaine Guo Yan and Chew, Wen Jie and Chiam, Lynn Hui Yun and Chong, Jenny Pek Ching and Chua, Ivan and Cook, Stuart A. and Dai, Wei and Dorajoo, Rajkumar and Foo, Chuan-Sheng and Goh, Rick Siow Mong and Hillmer, Axel M. and Irwan, Ishak D. and Jaufeerally, Fazlur and Javed, Asif and Jeyakani, Justin and Koh, John Tat Hung and Koh, Jia Yu and Krishnaswamy, Pavitra and Kuan, Jyn Ling and Kumari, Neelam and Lee, Ai Shan and Lee, Seow Eng and Lee, Sheldon and Lee, Yen Ling and Leong, See Ting and Li, Zheng and Li, Peter Yiqing and Liew, Jun Xian and Liew, Oi Wah and Lim, Su Chi and Lim, Weng Khong and Lim, Chia Wei and Lim, Tingsen Benson and Lim, Choon Kiat and Loh, Seet Yoong and Lok, Au Wing and Chin, Calvin W. L. and Majithia, Shivani and Maurer-Stroh, Sebastian and Meah, Wee Yang and Mok, Shi Qi and Nargarajan, Niranjan and Ng, Pauline and Ng, Sarah B. and Ng, Zhenyuan and Ng, Jessica Yan Xia and Ng, Ebonne and Ng, Shi Ling and Nusinovici, Simon and Ong, Chin Thing and Pan, Bangfen and Pedergnana, Vincent and Poh, Stanley and Prabhakar, Shyam and Prakash, Kumar M. and Quek, Ivy and Sabanayagam, Charumathi and See, Wei Qiang and Sia, Yee Yen and Sim, Xueling and Sim, Wey Cheng and So, Jimmy and Soon, Dinna K. N. and Tai, E. Shyong and Tan, Nicholas Y. and Tan, Louis C. S. and Tan, Hong Chang and Tan, Wilson Lek Wen and Tandiono, Moses and Tay, Amanda and Thakur, Sahil and Tham, Yih Chung and Tiang, Zenia and Toh, Grace Li-Xian and Tsai, Pi Kuang and Veeravalli, Lavanya and Verma, Chandra S. and Wang, Ling and Wang, Min Rui and Wong, Wing-Cheong and Xie, Zhicheng and Yeo, Khung Keong and Zhang, Liang and Zhai, Weiwei and Zhao, Yi and Liu, Jianjun and Wang, Chaolong}, month = oct, year = {2019}, keywords = {Asian populations, GWAS, admixture, demographic history, genetic diversity, imputation, natural selection, population structure, precision medicine, whole-genome sequencing}, pages = {736--749.e15}, } @article{harrow_elixir_2021, title = {{ELIXIR}: {Providing} a {Sustainable} {Infrastructure} for {Life} {Science} {Data} at {European} {Scale}.}, copyright = {cc by}, issn = {1367-4811}, shorttitle = {{ELIXIR}}, url = {https://europepmc.org/articles/PMC8388016}, doi = {10.1093/bioinformatics/btab481}, language = {eng}, urldate = {2022-11-22}, journal = {Bioinformatics (Oxford, England)}, author = {Harrow, Jennifer and Drysdale, Rachel and Smith, Andrew and Repo, Susanna and Lanfear, Jerry and Blomberg, Niklas}, month = jun, year = {2021}, pmid = {34175941}, pmcid = {PMC8388016}, pages = {btab481}, } @article{tudini_shariant_2022, title = {Shariant platform: {Enabling} evidence sharing across {Australian} clinical genetic-testing laboratories to support variant interpretation}, volume = {109}, issn = {0002-9297, 1537-6605}, shorttitle = {Shariant platform}, url = {https://www.cell.com/ajhg/abstract/S0002-9297(22)00454-2}, doi = {10.1016/j.ajhg.2022.10.006}, language = {English}, number = {11}, urldate = {2022-11-22}, journal = {The American Journal of Human Genetics}, author = {Tudini, Emma and Andrews, James and Lawrence, David M. and King-Smith, Sarah L. and Baker, Naomi and Baxter, Leanne and Beilby, John and Bennetts, Bruce and Beshay, Victoria and Black, Michael and Boughtwood, Tiffany F. and Brion, Kristian and Cheong, Pak Leng and Christie, Michael and Christodoulou, John and Chong, Belinda and Cox, Kathy and Davis, Mark R. and Dejong, Lucas and Dinger, Marcel E. and Doig, Kenneth D. and Douglas, Evelyn and Dubowsky, Andrew and Ellul, Melissa and Fellowes, Andrew and Fisk, Katrina and Fortuno, Cristina and Friend, Kathryn and Gallagher, Renee L. and Gao, Song and Hackett, Emma and Hadler, Johanna and Hipwell, Michael and Ho, Gladys and Hollway, Georgina and Hooper, Amanda J. and Kassahn, Karin S. and Krishnaraj, Rahul and Lau, Chiyan and Le, Huong and Leong, Huei San and Lundie, Ben and Lunke, Sebastian and Marty, Anthony and McPhillips, Mary and Nguyen, Lan T. and Nones, Katia and Palmer, Kristen and Pearson, John V. and Quinn, Michael C. J. and Rawlings, Lesley H. and Sadedin, Simon and Sanchez, Louisa and Schreiber, Andreas W. and Sigalas, Emanouil and Simsek, Aygul and Soubrier, Julien and Stark, Zornitza and Thompson, Bryony A. and U, James and Vakulin, Cassandra G. and Wells, Amanda V. and Wise, Cheryl A. and Woods, Rick and Ziolkowski, Andrew and Brion, Marie-Jo and Scott, Hamish S. and Thorne, Natalie P. and Spurdle, Amanda B.}, month = nov, year = {2022}, pmid = {36332611}, note = {Publisher: Elsevier}, pages = {1960--1973}, } @article{vasilevsky_mondo_2022, title = {Mondo: {Unifying} diseases for the world, by the world}, url = {https://www.medrxiv.org/content/early/2022/05/03/2022.04.13.22273750}, doi = {10.1101/2022.04.13.22273750}, abstract = {There are thousands of distinct disease entities and concepts, each of which are known by different and sometimes contradictory names. The Monarch Initiative aims to integrate genotype, phenotype, and disease knowledge from a large variety of sources in support of improved diagnostics and mechanism discovery through various algorithms and tools. However, the lack of a unified system for managing disease entities poses a major challenge for both machines and humans to predict causes and treatments for disease. The multitude of disease resources have not been well coordinated nor computationally integrated. Furthermore, the classification of phenotypes and their association with diseases is another source of disagreement across sources. The Human Phenotype Ontology has helped to standardize phenotypic features across knowledge sources, but there was no equivalent computationally-harmonized disease ontology. To address these problems, a community of disease resources worked together to create the Mondo Disease Ontology as an open, community-driven ontology that integrates key medical and biomedical terminologies and is iteratively and regularly updated via manual curation and through synchronization with external sources using a Bayesian algorithm. Mondo supports disease data integration to improve diagnosis, treatment, and translational research. It records the sources of all data and is continually updated, making it suitable for research and clinical applications that require up-to-date disease knowledge.Evidence before this study Many disease terminologies currently exist, but there is not a definitive standard for encoding diseases while addressing requirements for information exchange. Existing sources of disease definitions include the National Cancer Institute Thesaurus (NCIt), the Online Mendelian Inheritance in Man (OMIM), Orphanet, SNOMED CT, Disease Ontology (DO), ICD-10, MedGen, and numerous others. Each of these is designed for a particular purpose, and as such has different strengths. However, these standards only partially overlap and often conflict in the classification or mapping approach, making it difficult to align them with each other and/or with other knowledge sources. This need to integrate information has resulted in a proliferation of mappings between disease entries in different resources; these mappings lack completeness, accuracy, and precision, and are often inconsistent between resources.Added value of this study In order to computationally leverage the available knowledge sources for diagnostics and to reveal underlying mechanisms of diseases, we need to understand which terms are meaningfully equivalent across different resources. This will allow integration of associated information, such as treatments, genetics, phenotypes, etc. We therefore created the Mondo Disease Ontology to provide a logic-based structure for unifying multiple disease resources.Implications of all the available evidence Mondo can be leveraged by researchers and clinicians for disease annotations and data integration to aid in clinical diagnosis, treatment and advancement of human health care. Mondo is a freely available, open terminology that contains over 20,000 disease classes. Mondo is iteratively developed with contributions from the intended community and is under continuous revision, with future plans to further revise the top-level classes. Recently, efforts to classify rare diseases have centered on retrieving terms from various sources to provide a unified resource. Mondo can be explored using any of a variety of ontology browsers such as the Ontology Lookup Service (OLS) (ebi.ac.uk/ols/ontologies/mondo), and the ontology files and current releases are available on GitHub (github.com/monarch-initiative/mondo).Competing Interest StatementThe authors have declared no competing interest.Funding StatementMondo is generously supported by the NIH National Human Genome Research Institute Phenomics First Resource, NIH-NHGRI ⋕ 1 RM1 HG010860-01, a Center of Excellence in Genomic Science; and an NIH Office of the Director Grant ⋕5R24OD011883 for the Monarch Initiative. Additional support for this research/work was supported in part by the National Center for Biotechnology Information of the National Library of Medicine (NLM), National Institutes of Health.Author DeclarationsI confirm all relevant ethical guidelines have been followed, and any necessary IRB and/or ethics committee approvals have been obtained.YesI confirm that all necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived, and that any patient/participant/sample identifiers included were not known to anyone (e.g., hospital staff, patients or participants themselves) outside the research group so cannot be used to identify individuals.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).YesI have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesAll data produced are available online at https://github.com/monarch-initiative/mondo. https://github.com/monarch-initiative/mondo}, journal = {medRxiv}, author = {Vasilevsky, Nicole A and Matentzoglu, Nicolas A and Toro, Sabrina and Flack, Joseph E and Hegde, Harshad and Unni, Deepak R and Alyea, Gioconda F and Amberger, Joanna S and Babb, Larry and Balhoff, James P and Bingaman, Taylor I and Burns, Gully A and Buske, Orion J and Callahan, Tiffany J and Carmody, Leigh C and Cordo, Paula Carrio and Chan, Lauren E and Chang, George S and Christiaens, Sean L and Daugherty, Louise C and Dumontier, Michel and Failla, Laura E and Flowers, May J and Garrett, H. Alpha and Goldstein, Jennifer L and Gration, Dylan and Groza, Tudor and Hanauer, Marc and Harris, Nomi L and Hilton, Jason A and Himmelstein, Daniel S and Hoyt, Charles Tapley and Kane, Megan S and Köhler, Sebastian and Lagorce, David and Lai, Abbe and Larralde, Martin and Lock, Antonia and Santiago, Irene López and Maglott, Donna R and Malheiro, Adriana J and Meldal, Birgit H M and Munoz-Torres, Monica C and Nelson, Tristan H and Nicholas, Frank W and Ochoa, David and Olson, Daniel P and Oprea, Tudor I and Osumi-Sutherland, David and Parkinson, Helen and Pendlington, Zoë May and Rath, Ana and Rehm, Heidi L and Remennik, Lyubov and Riggs, Erin R and Roncaglia, Paola and Ross, Justyne E and Shadbolt, Marion F and Shefchek, Kent A and Similuk, Morgan N and Sioutos, Nicholas and Smedley, Damian and Sparks, Rachel and Stefancsik, Ray and Stephan, Ralf and Storm, Andrea L and Stupp, Doron and Stupp, Gregory S and Sundaramurthi, Jagadish Chandrabose and Tammen, Imke and Tay, Darin and Thaxton, Courtney L and Valasek, Eloise and Valls-Margarit, Jordi and Wagner, Alex H and Welter, Danielle and Whetzel, Patricia L and Whiteman, Lori L and Wood, Valerie and Xu, Colleen H and Zankl, Andreas and Zhang, Xingmin Aaron and Chute, Christopher G and Robinson, Peter N and Mungall, Christopher J and Hamosh, Ada and Haendel, Melissa A}, year = {2022}, note = {Publisher: Cold Spring Harbor Laboratory Press \_eprint: https://www.medrxiv.org/content/early/2022/05/03/2022.04.13.22273750.full.pdf}, } @article{shadbolt_national_2022, title = {National and international collaboration to facilitate human genomics data sharing in {Australia}: {The} {Human} {Genomes} {Platform} {Project}}, volume = {11}, shorttitle = {{\textless}p{\textgreater}{National} and international collaboration to facilitate human genomics data sharing in {Australia}}, url = {https://f1000research.com/posters/11-624}, doi = {10.7490/f1000research.1118989.1}, abstract = {Read this work by Shadbolt M, at F1000Research.}, urldate = {2022-11-20}, journal = {F1000Research}, author = {Shadbolt, Marion and Boughtwood, Tiffany and Christiansen, Jeff and Copty, Joe and Cowley, Mark and Davies, Kylie and Downton, Matthew and Druken, Kelsey and Evans, Ben and Gaff, Clara and Gilbert, Andrew and Hall, Christina and Hofmann, Oliver and Holliday, Jessica and Kaplan, Warren and Koufariotis, Ross and Kummerfeld, Sarah and Leonard, Conrad and Lin, Angela and Lonie, Andrew and Marks, Heath and McCafferty, Siobhann and Monro, David and Patterson, Andrew and Pearson, John and Pope, Bernard and Ravishankar, Shyamsundar and Reisinger, Florian and Robinson, Andrew and Lin, Victor San Kho and Scullen, John and Syed, Mustafa and Taouk, Kamile and Treloar, Andrew and Wang, Jingbo and Wong-Erasmus, Marie and Wood, Scott}, month = jun, year = {2022}, } @misc{ag_privacy_2022, title = {Privacy {Act} 1988}, copyright = {https://www.legislation.gov.au/Content/Disclaimer}, url = {http://www.legislation.gov.au/Details/C2022C00199}, abstract = {C2022C00199}, language = {en}, urldate = {2022-11-07}, author = {AG}, year = {2022}, note = {Archive Location: au Publisher: Attorney-General's Department}, } @misc{oaic_privacy_2022, title = {The {Privacy} {Act} informational website}, url = {https://www.oaic.gov.au/privacy/the-privacy-act}, abstract = {The Privacy Act 1988 was introduced to promote and protect the privacy of individuals and to regulate how Australian Government agencies and organisations with an annual turnover of more than \$3 million, and some other organisations, handle personal information.}, language = {en}, urldate = {2022-11-07}, journal = {Home}, author = {OAIC}, year = {2022}, } @misc{ga4gh_workflow_2022, title = {Workflow {Execution} {Service}}, url = {https://ga4gh.github.io/workflow-execution-service-schemas/docs/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{ga4gh_welcome_2022, title = {Welcome to the technical documentation for the {GA4GH} {Pedigree} {Standard}! — 0.1 documentation}, url = {https://pedigree.readthedocs.io/en/latest/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{fairsharing_team_fairsharing_2022, title = {{FAIRsharing} {\textbar} {Home}}, url = {https://fairsharing.org/}, urldate = {2022-09-01}, author = {FAIRsharing team}, year = {2022}, } @misc{ga4gh_ga4gh_2022, title = {{GA4GH} {DRS} {Client} {Documentation} — {GA4GH} {DRS} {Client} documentation}, url = {https://ga4gh-drs-client.readthedocs.io/en/latest/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{fhir_summary_2022, title = {Summary - {FHIR} v4.3.0}, url = {https://www.hl7.org/fhir/summary.html}, urldate = {2022-09-12}, author = {FHIR}, year = {2022}, } @misc{ohdsi_omop_2022, title = {{OMOP} {Common} {Data} {Model} – {OHDSI}}, url = {https://www.ohdsi.org/data-standardization/the-common-data-model/}, language = {en-US}, urldate = {2022-09-12}, author = {OHDSI}, year = {2022}, } @misc{ga4gh_github_2021, title = {{GitHub} - {EBISPOT}/{DUO}: {Ontology} for consent codes and data use requirements}, url = {https://github.com/EBISPOT/DUO}, urldate = {2021-10-19}, author = {GA4GH}, year = {2021}, } @misc{ga4gh_data-securityaaiconnectprofilemd_2021, title = {data-security/{AAIConnectProfile}.md at master · ga4gh/data-security · {GitHub}}, url = {https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md#abstract}, urldate = {2021-10-19}, author = {GA4GH}, year = {2021}, } @misc{embl-ebi_embl-ebi_2021, title = {{EMBL}-{EBI} {Impact} report 2021 – {EMBL} {Documents}}, url = {https://www.embl.org/documents/document/embl-ebi-impact-report-2021/}, urldate = {2021-10-25}, author = {EMBL-EBI}, year = {2021}, } @article{martinez-garcia_fairness_2022, title = {{FAIRness} for {FHIR}: {Towards} {Making} {Health} {Datasets} {FAIR} {Using} {HL7} {FHIR}}, shorttitle = {{FAIRness} for {FHIR}}, url = {https://ebooks.iospress.nl/doi/10.3233/SHTI220024}, doi = {10.3233/SHTI220024}, urldate = {2022-09-12}, journal = {MEDINFO 2021: One World, One Health – Global Partnership for Digital Innovation}, author = {Martínez-García, Alicia and Cangioli, Giorgio and Chronaki, Catherine and Löbe, Matthias and Beyan, Oya and Juehne, Anthony and Parra-Calderón, Carlos Luis}, year = {2022}, note = {Publisher: IOS Press}, pages = {22--26}, } @misc{ga4gh_ga4gh_2022-1, title = {{GA4GH} {WES} {API} enables portable genomic analysis}, url = {https://www.ga4gh.org/news/ga4gh-wes-api-enables-portable-genomic-analysis/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{springer_nature_mandated_2022, title = {Mandated data types {\textbar} {Authors} {\textbar} {Springer} {Nature}}, url = {https://www.springernature.com/gp/authors/research-data-policy/repositories-socsci/19540364}, urldate = {2022-08-31}, author = {Springer Nature}, year = {2022}, } @misc{cineca_assigning_2021, title = {Assigning standard descriptors to free text — {CINECA} - {Common} {Infrastructure} for {National} {Cohorts} in {Europe}, {Canada}, and {Africa}}, url = {https://www.cineca-project.eu/blog-all/assigning-standard-descriptors-to-free-text}, urldate = {2021-10-19}, author = {CINECA}, year = {2021}, } @misc{cineca_lexmapr_2021, title = {{LexMapr} - {A} rule-based text-mining tool for ontology term mapping and classification — {CINECA} - {Common} {Infrastructure} for {National} {Cohorts} in {Europe}, {Canada}, and {Africa}}, url = {https://www.cineca-project.eu/blog-all/lexmapr-a-rule-based-text-mining-tool-for-ontology-term-mapping-and-classification}, urldate = {2021-10-19}, author = {CINECA}, year = {2021}, } @misc{cineca_uncovering_2021, title = {Uncovering metadata from semi-structured cohort data — {CINECA} - {Common} {Infrastructure} for {National} {Cohorts} in {Europe}, {Canada}, and {Africa}}, url = {https://www.cineca-project.eu/blog-all/uncovering-metadata-from-semi-structured-cohort-data}, urldate = {2021-10-19}, author = {CINECA}, year = {2021}, } @misc{wellcome_trust_data_2017, title = {Data, software and materials management and sharing policy - {Grant} {Funding}}, url = {https://wellcome.org/grant-funding/guidance/data-software-materials-management-and-sharing-policy}, abstract = {Read how researchers should manage and share data, software and materials that arise from Wellcome-funded research, and outputs management plans.}, urldate = {2021-10-18}, journal = {Wellcome}, author = {Wellcome Trust}, year = {2017}, } @misc{nhmrc_australian_2018, title = {Australian {Code} for the {Responsible} {Conduct} of {Research}, 2018 {\textbar} {NHMRC}}, url = {https://www.nhmrc.gov.au/about-us/publications/australian-code-responsible-conduct-research-2018}, urldate = {2021-10-25}, author = {NHMRC}, year = {2018}, } @misc{nanyang_technological_university_libguides_2022, title = {{LibGuides}: {Research} {Data} {Management}: {Sensitive} {Data}}, copyright = {Copyright Nanyang Technological University 2022}, shorttitle = {{LibGuides}}, url = {https://libguides.ntu.edu.sg/rdm/sensitivedata}, abstract = {Working with sensitve data}, language = {en}, urldate = {2022-08-23}, author = {Nanyang Technological University}, year = {2022}, } @misc{openaire_20200429_openaire_2020, title = {20200429\_OpenAIRE {Legal} {Policy} {Webinar} for researchers - {YouTube}}, url = {https://www.youtube.com/watch?v=85x6Rigl2-o}, urldate = {2022-08-23}, author = {OpenAIRE}, year = {2020}, } @misc{ga4gh_drs_2022, title = {{DRS} {API}: {Enabling} {Cloud}-{Based} {Data} {Access} and {Retrieval}}, url = {https://www.ga4gh.org/news/drs-api-enabling-cloud-based-data-access-and-retrieval/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{ga4gh_data_2022, title = {Data {Repository} {Service}}, url = {https://ga4gh.github.io/data-repository-service-schemas/preview/release/drs-1.0.0/docs/}, urldate = {2022-09-12}, author = {GA4GH}, year = {2022}, } @misc{ena_ena_2022, title = {{ENA}: {Guidelines} and {Tutorials} — {ENA} {Training} {Modules} 1 documentation}, url = {https://ena-docs.readthedocs.io/en/latest/#}, urldate = {2022-08-24}, author = {ENA}, year = {2022}, } @misc{cncb-ngdc_database_2022, title = {Database {Resources} of the {National} {Genomics} {Data} {Center}, {China} {National} {Center} for {Bioinformation} in 2022. - {Abstract} - {Europe} {PMC}}, url = {https://europepmc.org/article/MED/34718731}, urldate = {2022-08-16}, author = {CNCB-NGDC}, year = {2022}, } @misc{alrc_essentially_2003, title = {Essentially {Yours}: {The} {Protection} of {Human} {Genetic} {Information} in {Australia} ({ALRC} {Report} 96)}, shorttitle = {Essentially {Yours}}, url = {https://www.alrc.gov.au/publication/essentially-yours-the-protection-of-human-genetic-information-in-australia-alrc-report-96/}, abstract = {ALRC Report 96 (tabled May 2003)  was the product of a two-year inquiry by the ALRC and the Australian Health Ethics Committee (AHEC) of the NHMRC, involving extensive research and widespread public consultation.The inquiry was the most comprehensive ever undertaken into these issues in Australia or overseas.The report covers an extensive range of activities in ...}, language = {en-AU}, urldate = {2022-09-05}, journal = {ALRC}, author = {ALRC}, year = {2003}, } @book{observational_health_data_sciences_and_informatics_chapter_2021, title = {Chapter 4 {The} {Common} {Data} {Model} {\textbar} {The} {Book} of {OHDSI}}, url = {https://ohdsi.github.io/TheBookOfOhdsi/}, abstract = {A book about the Observational Health Data Sciences and Informatics (OHDSI). It described the OHDSI community, open standards and open source software.}, urldate = {2022-09-12}, author = {Observational Health Data Sciences {and} Informatics}, month = nov, year = {2021}, } @misc{ega_prepare_2022, title = {Prepare {XMLs} - {EGA} {European} {Genome}-{Phenome} {Archive}}, url = {https://ega-archive.org/submission/sequence/programmatic_submissions/prepare_xml}, urldate = {2022-08-29}, author = {EGA}, year = {2022}, } @misc{cncb-ngdc_genome_2019, title = {Genome {Sequence} {Archive} for {Human} - {Policies}}, url = {https://ngdc.cncb.ac.cn/gsa-human/policy/policy.jsp#responsibilitiesSubmitter}, urldate = {2022-08-26}, author = {CNCB-NGDC}, year = {2019}, keywords = {DAC, GSA, controlled access, human}, } @misc{nhmrc_national_2018, title = {National {Statement} on {Ethical} {Conduct} in {Human} {Research} (2007) - {Updated} 2018 {\textbar} {NHMRC}}, url = {https://www.nhmrc.gov.au/about-us/publications/national-statement-ethical-conduct-human-research-2007-updated-2018}, urldate = {2021-10-26}, author = {NHMRC}, year = {2018}, } @techreport{nhmrc_management_2019, address = {Canberra}, type = {guide}, title = {Management of {Data} and {Information} in {Research}: {A} guide supporting the {Australian} {Code} for the {Responsible} {Conduct} of {Research}}, url = {https://www.nhmrc.gov.au/sites/default/files/documents/attachments/Management-of-Data-and-Information-in-Research.pdf}, number = {R41B}, urldate = {2021-10-25}, institution = {Commonwealth of Australia}, author = {NHMRC and Australian Research Council and Universities Australia}, year = {2019}, } @misc{cruk_data_2014, title = {Data sharing guidelines}, url = {https://www.cancerresearchuk.org/funding-for-researchers/applying-for-funding/policies-that-affect-your-grant/submission-of-a-data-sharing-and-preservation-strategy/data-sharing-guidelines}, abstract = {At CRUK, we are committed to ensuring that the data generated through its funding should be put to maximum use by the cancer research community and, whenever possible, is translated to deliver patient benefit. It is therefore our policy that all data generated as a result of our funding be considered for sharing and made as widely and freely accessible as possible whilst safeguarding intellectual property, the privacy of patients and confidential data. Researchers applying for funding should familiarise themselves with our Data Sharing and Preservation Policy.}, urldate = {2021-10-18}, journal = {Cancer Research UK}, author = {CRUK}, month = mar, year = {2014}, } @misc{ga4gh_ga4gh_2022, title = {{GA4GH} {Passports} and the {Authorization} and {Authentication} {Infrastructure}}, url = {https://www.ga4gh.org/news/ga4gh-passports-and-the-authorization-and-authentication-infrastructure/}, urldate = {2021-10-19}, author = {GA4GH}, year = {2022}, } @misc{ga4gh_data_2022, title = {Data {Repository} {Service} ({DRS}) {API}}, copyright = {Apache-2.0}, url = {https://github.com/ga4gh/data-repository-service-schemas}, abstract = {A repository for the schemas used for the Data Repository Service.}, urldate = {2022-09-12}, publisher = {Global Alliance for Genomics and Health}, author = {GA4GH}, month = jul, year = {2022}, note = {original-date: 2017-02-27T03:52:32Z}, } @misc{oliveira_portuguese_2022, title = {The {Portuguese} {Local} {European} {Genome}-{Phenome} {Archive} ({EGA})}, url = {https://zenodo.org/record/7186542}, abstract = {BioData.pt has been participating in EGA related projects since 2018, namely through the ELIXIR Beacon Community, Federated Human Data Community and ELIXIR-CONVERGE WP7. We have been early adopters of Beacon and Federated EGA. All of these projects align with National Initiatives, namely the mirror groups of the Beyond One Million Genomes (B1MG) project and the Portuguese Initiative for Genomic Medicine National strategy, led by the National Institute of Health, with whom we have been collaborating closely, specially in the technical aspects of these projects. Both in ELIXIR and B1MG we participated in pilot maturity/maturation models, aiming to access the level of participating nodes.}, language = {eng}, urldate = {2022-11-03}, author = {Oliveira, Jorge S. and Silva, Fernando Mira da and Oliveira, José Luís and Silva, Mário and Melo, Ana M. P.}, month = oct, year = {2022}, doi = {10.5281/zenodo.7186542}, keywords = {GDPR, data management, european genome-phenome archive, genomic data, health data, human data}, } @misc{brandt_characterizing_2022, title = {Characterizing {Variability} of {EHR}-{Driven} {Phenotype} {Definitions}}, copyright = {© 2022, Posted by Cold Spring Harbor Laboratory. The copyright holder for this pre-print is the author. All rights reserved. The material may not be redistributed, re-used or adapted without the author's permission.}, url = {https://www.medrxiv.org/content/10.1101/2022.07.10.22277390v1}, doi = {10.1101/2022.07.10.22277390}, abstract = {Objective Analyze a publicly available sample of rule-based phenotype definitions to characterize and evaluate the types of logical constructs used. Materials \& Methods A sample of 33 phenotype definitions used in research and published to the Phenotype KnowledgeBase (PheKB), that are represented using Fast Healthcare Interoperability Resources (FHIR) and Clinical Quality Language (CQL) was analyzed using automated analysis of the computable representation of the CQL libraries. Results Most of the phenotype definitions include narrative descriptions and flowcharts, while few provide pseudocode or executable artifacts. Most use 4 or fewer medical terminologies. The number of codes used ranges from 5 to 6865, and value sets from 1 to 19. We found the most common expressions used were literal, data, and logical expressions. Aggregate and arithmetic expressions are the least common. Expression depth ranges from 4 to 27. Discussion Despite the range of conditions, we found that all of the phenotype definitions consisted of logical criteria, representing both clinical and operational logic, and tabular data, consisting of codes from standard terminologies and keywords for natural language processing. The total number and variety of expressions is low, which may be to simplify implementation, or authors may limit complexity due to data availability constraints. Conclusion The phenotypes analyzed show significant variation in specific logical, arithmetic and other operators, but are all composed of the same high-level components, namely tabular data and logical expressions. A standard representation for phenotype definitions should support these formats and be modular to support localization and shared logic.}, language = {en}, urldate = {2022-11-03}, publisher = {medRxiv}, author = {Brandt, Pascal S. and Kho, Abel and Luo, Yuan and Pacheco, Jennifer A. and Walunas, Theresa L. and Hakonarson, Hakon and Hripcsak, George and Liu, Cong and Shang, Ning and Weng, Chunhua and Walton, Nephi and Carrell, David S. and Crane, Paul K. and Larson, Eric and Chute, Christopher G. and Kullo, Iftikhar and Carroll, Robert and Denny, Josh and Ramirez, Andrea and Wei, Wei-Qi and Pathak, Jyoti and Wiley, Laura K. and Richesson, Rachel and Starren, Justin B. and Rasmussen, Luke V.}, month = jul, year = {2022}, note = {ISSN: 2227-7390 Pages: 2022.07.10.22277390}, } @article{horgan_b1mg_2022, title = {{B1MG} {D1}.5 {Stakeholders} trust in genomic data sharing landscape analysis}, url = {https://zenodo.org/record/6382431}, doi = {10.5281/zenodo.6382431}, abstract = {This scoping report represents a mission-oriented approach to supporting citizens' engagement and public trust in genomic data sharing, scoping out and suggesting possible approaches. It recognises from the outset that the issues of engagement and trust extend beyond the indispensable involvement of citizens to embrace a much wider range of stakeholders whose buy-in will also be essential to success. This review takes account of the B1MG and 1+MG, and aims to offer conceptual and practical steps for building on those achievements to bridge potential and actual benefits to science and society with a specific focus on citizens.   The rapid progress that has been made in genomics over the past two decades has created much debate (Boccia, 20141). On the one hand, genomics has the potential to deliver earlier diagnosis, more effective prevention programmes and more precise targeting of therapies, in some cases challenging our understanding of the nature of certain diseases. On the other hand, it raises a range of ethical, social and legal challenges (ELSI), including among other issues, protection and ownership of data, the need for care in interpreting data, potential misuse of data by commercial organisations, especially insurance companies, and questions about autonomy and the potential for stigma (National Research Council, 1988). As insights from genomics are increasingly used in clinical settings to inform personalised medicine, these ELSI considerations have been broadened, with many concerned this will widen existing inequalities in health care (Brothers \& Rothstein, 20152). While genetic testing may improve disease prediction, diagnosis, and treatment, the rapid uptake and application of genetics and genomics raise numerous ethical, legal, and social issues (ELSI). One of the most prominent among these is the growing number of possibilities of using genetic information to justify treating individuals differently or profiling specific population groups that may lead to genetic discrimination (GD) (Kim, 20213). The EU’s General Data Protection Regulation (GDPR) is an important regulatory change at the EU level that has significant implications for the processing of genomic data in research and clinical practice. Recital 51 of the GDPR designates personal data which is, by its nature, particularly sensitive in relation to fundamental rights and freedoms as requiring specific protection, as the context of its processing could create significant risks to those fundamental rights and freedoms. Beside data generation and processing, ensuring data privacy and security requires robust infrastructure, training, as well as careful regulation of access. To capture the full benefits of genomic data, it must be shared with multiple actors, including: researchers to support academic and clinical research; health providers to support delivery of health services and public health activities; and commercial organisations involved in developing and implementing new health technologies or delivering health care services (Williams et al, 2021 4). Data sharing to facilitate greater genomic research and translation of findings into clinical use relies on the implementation of advanced technological solutions, health workers with the right skills and training to contribute to implementation, the active involvement of citizens and patients that support translation, and implementation of strong regulatory and governance procedures (Raza \& Hall, 20175). In this context, it is unavoidable to mention the concurrent COVID-19 pandemic, in view of the evolution of public awareness of science, as hopes have grown for vaccines and treatments to counter COVID-19. But this enhanced awareness carries corresponding responsibilities alongside the obvious potential benefits for mustering support for science. The pace of change, already increasingly rapid as science opens ever more doors to understanding of health, disease, diagnosis and treatment, has received dramatic new impetus through the exigencies of the COVID 19. Never in modern times has the population of the planet been subject to such a direct threat to health, and never have science and technology responded with such alacrity, energy and effectiveness. As the European Council secretariat explains in updating on progress towards an international agreement on pandemics, "the sharing of pathogens, biological samples and genomic data as well as the development of timely medical solutions (vaccines, treatments and diagnostics) are vital in order to enhance global pandemic preparedness." (https://www.consilium.europa.eu/en/policies/coronavirus/pandemic-treaty/) The corresponding leap in citizens' awareness and expectations of the world of science is however presenting a new dilemma: while health policy is encouraging and promoting (and indeed very largely depending on) science to solve the unprecedented challenge that COVID 19 presents, it also has to cater to the importance of bringing society along with developments, to foster understanding and to counter scepticism, suspicion, doubt and even hostility. This concern informs the European Parliament resolution on strengthening the fight against cancer, adopted by the plenary on 16 February 2022. It "encourages the Commission and the Member States to promote the knowledge of cancer biology through the implementation of genomics and informatics infrastructures; urges all implementation partners to be ever mindful of the principles of data privacy and security, trust, transparency, patient centricity and patient involvement at all times." (https://www.europarl.europa.eu/doceo/document/TA-9-2022-0038\_EN.html) Against this background, it is prudent to consider the risk that the application of genome technology to healthcare, despite all its multiple potential benefits as a breakthrough in healthcare, could be confronted with similar turbulence and even negative opinion unless meticulous attention is paid to the challenges of gaining public understanding. Public understanding is vital in itself to gain trust on this data acquisition – but it is also an indispensable condition for obtaining the policy support that genomic medicine demands for implementing this ambitious project. At the heart of the 1+MG/B1MG concept is the collection and exchange of citizens' data. It is consequently implicit in the logic of the exercise that the interests of the citizens – whose data is to be collected and exchanged – should receive priority attention, even while solutions are found to the myriad technical, scientific, legal and political issues that the project also raises. This paper brings focus to this dilemma in European terms, scoping out both the potential and the challenges, and suggesting some approaches that can improve the chances of successful conciliation and mutual understanding between science and citizens – with the attendant benefits accruing to society as a whole.}, language = {eng}, urldate = {2022-10-27}, author = {Horgan, Denis and Boccia, Stefania and Becker, Regina and Scollen, Serena and Merchant, Arshiya and van El, Carla and Hoxaj, Ilda and Sassano, Michele and Pezzullo, Angelo and Julkowska, Daria}, month = mar, year = {2022}, note = {Publisher: Zenodo}, keywords = {1 Million Genomes Initiative, 1+MG, 1+MG Stakeholders, B1MG, Beyond 1 Million Genomes, citizens' engagement, genomic data, stakeholders' trust}, } @article{odoherty_toward_2021, title = {Toward better governance of human genomic data.}, volume = {53}, url = {http://dx.doi.org/10.1038/s41588-020-00742-6}, doi = {10.1038/s41588-020-00742-6}, number = {1}, urldate = {2022-01-28}, journal = {Nature Genetics}, author = {O'Doherty, Kieran C and Shabani, Mahsa and Dove, Edward S and Bentzen, Heidi Beate and Borry, Pascal and Burgess, Michael M and Chalmers, Don and De Vries, Jantina and Eckstein, Lisa and Fullerton, Stephanie M and Juengst, Eric and Kato, Kazuto and Kaye, Jane and Knoppers, Bartha Maria and Koenig, Barbara A and Manson, Spero M and McGrail, Kimberlyn M and McGuire, Amy L and Meslin, Eric M and Nicol, Dianne and Prainsack, Barbara and Terry, Sharon F and Thorogood, Adrian and Burke, Wylie}, year = {2021}, keywords = {Genetics, Genomics, consent, legal, privacy, sharing}, pages = {2--8}, } @article{gruendner_architecture_2022, title = {The {Architecture} of a {Feasibility} {Query} {Portal} for {Distributed} {COVID}-19 {Fast} {Healthcare} {Interoperability} {Resources} ({FHIR}) {Patient} {Data} {Repositories}: {Design} and {Implementation} {Study}}, volume = {10}, copyright = {cc by}, issn = {2291-9694}, shorttitle = {The {Architecture} of a {Feasibility} {Query} {Portal} for {Distributed} {COVID}-19 {Fast} {Healthcare} {Interoperability} {Resources} ({FHIR}) {Patient} {Data} {Repositories}}, url = {https://europepmc.org/articles/PMC9135115}, doi = {10.2196/36709}, abstract = {BackgroundAn essential step in any medical research project after identifying the research question is to determine if there are sufficient patients available for a study and where to find them. Pursuing digital feasibility queries on available patient data registries has proven to be an excellent way of reusing existing real-world data sources. To support multicentric research, these feasibility queries should be designed and implemented to run across multiple sites and securely access local data. Working across hospitals usually involves working with different data formats and vocabularies. Recently, the Fast Healthcare Interoperability Resources (FHIR) standard was developed by Health Level Seven to address this concern and describe patient data in a standardized format. The Medical Informatics Initiative in Germany has committed to this standard and created data integration centers, which convert existing data into the FHIR format at each hospital. This partially solves the interoperability problem; however, a distributed feasibility query platform for the FHIR standard is still missing.ObjectiveThis study described the design and implementation of the components involved in creating a cross-hospital feasibility query platform for researchers based on FHIR resources. This effort was part of a large COVID-19 data exchange platform and was designed to be scalable for a broad range of patient data.MethodsWe analyzed and designed the abstract components necessary for a distributed feasibility query. This included a user interface for creating the query, backend with an ontology and terminology service, middleware for query distribution, and FHIR feasibility query execution service.ResultsWe implemented the components described in the Methods section. The resulting solution was distributed to 33 German university hospitals. The functionality of the comprehensive network infrastructure was demonstrated using a test data set based on the German Corona Consensus Data Set. A performance test using specifically created synthetic data revealed the applicability of our solution to data sets containing millions of FHIR resources. The solution can be easily deployed across hospitals and supports feasibility queries, combining multiple inclusion and exclusion criteria using standard Health Level Seven query languages such as Clinical Quality Language and FHIR Search. Developing a platform based on multiple microservices allowed us to create an extendable platform and support multiple Health Level Seven query languages and middleware components to allow integration with future directions of the Medical Informatics Initiative.ConclusionsWe designed and implemented a feasibility platform for distributed feasibility queries, which works directly on FHIR-formatted data and distributed it across 33 university hospitals in Germany. We showed that developing a feasibility platform directly on the FHIR standard is feasible.}, language = {eng}, number = {5}, urldate = {2022-09-12}, journal = {JMIR medical informatics}, author = {Gruendner, Julian and Deppenwiese, Noemi and Folz, Michael and Köhler, Thomas and Kroll, Björn and Prokosch, Hans-Ulrich and Rosenau, Lorenz and Rühle, Mathias and Scheidl, Marc-Anton and Schüttler, Christina and Sedlmayr, Brita and Twrdik, Alexander and Kiel, Alexander and Majeed, Raphael W}, month = may, year = {2022}, pmid = {35486893}, pmcid = {PMC9135115}, keywords = {Consensus Data Set, Covid-19, Cql, Distributed Analysis, Fast Healthcare Interoperability Resources, Feasibility Study, Federated Feasibility Queries, Fhir, Fhir Search, Health Data, Hl7 Fhir, Medical Informatics, Pandemic, Patient Data, Query}, pages = {e36709}, } @article{duda_hl7_2022, title = {{HL7} {FHIR}-based tools and initiatives to support clinical research: a scoping review}, volume = {29}, issn = {1527-974X}, shorttitle = {{HL7} {FHIR}-based tools and initiatives to support clinical research}, url = {https://doi.org/10.1093/jamia/ocac105}, doi = {10.1093/jamia/ocac105}, abstract = {ObjectivesThe HL7® fast healthcare interoperability resources (FHIR®) specification has emerged as the leading interoperability standard for the exchange of healthcare data. We conducted a scoping review to identify trends and gaps in the use of FHIR for clinical research.Materials and methodsWe reviewed published literature, federally funded project databases, application websites, and other sources to discover FHIR-based papers, projects, and tools (collectively, "FHIR projects") available to support clinical research activities.ResultsOur search identified 203 different FHIR projects applicable to clinical research. Most were associated with preparations to conduct research, such as data mapping to and from FHIR formats (n = 66, 32.5\%) and managing ontologies with FHIR (n = 30, 14.8\%), or post-study data activities, such as sharing data using repositories or registries (n = 24, 11.8\%), general research data sharing (n = 23, 11.3\%), and management of genomic data (n = 21, 10.3\%). With the exception of phenotyping (n = 19, 9.4\%), fewer FHIR-based projects focused on needs within the clinical research process itself.DiscussionFunding and usage of FHIR-enabled solutions for research are expanding, but most projects appear focused on establishing data pipelines and linking clinical systems such as electronic health records, patient-facing data systems, and registries, possibly due to the relative newness of FHIR and the incentives for FHIR integration in health information systems. Fewer FHIR projects were associated with research-only activities.ConclusionThe FHIR standard is becoming an essential component of the clinical research enterprise. To develop FHIR's full potential for clinical research, funding and operational stakeholders should address gaps in FHIR-based research tools and methods.}, language = {eng}, number = {9}, urldate = {2022-09-12}, journal = {Journal of the American Medical Informatics Association}, author = {Duda, Stephany N and Kennedy, Nan and Conway, Douglas and Cheng, Alex C and Nguyen, Viet and Zayas-Cabán, Teresa and Harris, Paul A}, month = aug, year = {2022}, pmid = {35818340}, keywords = {Electronic Health Records, Fast Healthcare Interoperability Resources (Fhir), Health Information Interoperability, Health Information Management, data management}, pages = {1642--1653}, } @misc{alper_data_2020, title = {Data {Protection} in {Biomedical} {Research}}, url = {https://zenodo.org/record/5078280}, abstract = {Slides for the data protection session of the training "Best practices in research data management and stewardship" held on 15 June 2021 by ELIXIR-Luxembourg.}, language = {eng}, urldate = {2022-08-19}, author = {Alper, Pinar}, month = jan, year = {2020}, doi = {10.5281/zenodo.5078280}, keywords = {biomedical research, compliance, data protection, gdpr}, } @article{davis_geoquery_2007, title = {{GEOquery}: a bridge between the {Gene} {Expression} {Omnibus} ({GEO}) and {BioConductor}}, volume = {23}, issn = {1367-4803, 1460-2059}, shorttitle = {{GEOquery}}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btm254}, doi = {10.1093/bioinformatics/btm254}, abstract = {Microarray technology has become a standard molecular biology tool. Experimental data have been generated on a huge number of organisms, tissue types, treatment conditions and disease states. The Gene Expression Omnibus ( Barrett et al., 2005), developed by the National Center for Bioinformatics (NCBI) at the National Institutes of Health is a repository of nearly 140 000 gene expression experiments. The BioConductor project (Gentleman et al., 2004) is an open-source and open-development software project built in the R statistical programming environment (R Development core Team, 2005) for the analysis and comprehension of genomic data. The tools contained in the BioConductor project represent many state-of-the-art methods for the analysis of microarray and genomics data. We have developed a software tool that allows access to the wealth of information within GEO directly from BioConductor, eliminating many the formatting and parsing problems that have made such analyses labor-intensive in the past. The software, called GEOquery, effectively establishes a bridge between GEO and BioConductor. Easy access to GEO data from BioConductor will likely lead to new analyses of GEO data using novel and rigorous statistical and bioinformatic tools. Facilitating analyses and metaanalyses of microarray data will increase the efficiency with which biologically important conclusions can be drawn from published genomic data.}, language = {en}, number = {14}, urldate = {2022-09-09}, journal = {Bioinformatics}, author = {Davis, S. and Meltzer, P. S.}, month = jul, year = {2007}, pages = {1846--1847}, } @article{zappia_exploring_2018, title = {Exploring the single-cell {RNA}-seq analysis landscape with the {scRNA}-tools database}, volume = {14}, issn = {1553-7358}, url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006245}, doi = {10.1371/journal.pcbi.1006245}, abstract = {As single-cell RNA-sequencing (scRNA-seq) datasets have become more widespread the number of tools designed to analyse these data has dramatically increased. Navigating the vast sea of tools now available is becoming increasingly challenging for researchers. In order to better facilitate selection of appropriate analysis tools we have created the scRNA-tools database (www.scRNA-tools.org) to catalogue and curate analysis tools as they become available. Our database collects a range of information on each scRNA-seq analysis tool and categorises them according to the analysis tasks they perform. Exploration of this database gives insights into the areas of rapid development of analysis methods for scRNA-seq data. We see that many tools perform tasks specific to scRNA-seq analysis, particularly clustering and ordering of cells. We also find that the scRNA-seq community embraces an open-source and open-science approach, with most tools available under open-source licenses and preprints being extensively used as a means to describe methods. The scRNA-tools database provides a valuable resource for researchers embarking on scRNA-seq analysis and records the growth of the field over time.}, language = {en}, number = {6}, urldate = {2022-09-13}, journal = {PLOS Computational Biology}, author = {Zappia, Luke and Phipson, Belinda and Oshlack, Alicia}, month = jun, year = {2018}, note = {Publisher: Public Library of Science}, keywords = {Data visualization, Database and informatics methods, Gene expression, Gene sequencing, Open source software, Programming languages, Software tools, Source code}, pages = {e1006245}, } @article{jacobsen_ga4gh_2022, title = {The {GA4GH} {Phenopacket} schema defines a computable representation of clinical data}, volume = {40}, copyright = {2022 The Author(s), under exclusive licence to Springer Nature America, Inc.}, issn = {1546-1696}, url = {https://www.nature.com/articles/s41587-022-01357-4}, doi = {10.1038/s41587-022-01357-4}, language = {en}, number = {6}, urldate = {2022-09-12}, journal = {Nature Biotechnology}, author = {Jacobsen, Julius O. B. and Baudis, Michael and Baynam, Gareth S. and Beckmann, Jacques S. and Beltran, Sergi and Buske, Orion J. and Callahan, Tiffany J. and Chute, Christopher G. and Courtot, Mélanie and Danis, Daniel and Elemento, Olivier and Essenwanger, Andrea and Freimuth, Robert R. and Gargano, Michael A. and Groza, Tudor and Hamosh, Ada and Harris, Nomi L. and Kaliyaperumal, Rajaram and Lloyd, Kevin C. Kent and Khalifa, Aly and Krawitz, Peter M. and Köhler, Sebastian and Laraway, Brian J. and Lehväslaiho, Heikki and Matalonga, Leslie and McMurry, Julie A. and Metke-Jimenez, Alejandro and Mungall, Christopher J. and Munoz-Torres, Monica C. and Ogishima, Soichi and Papakonstantinou, Anastasios and Piscia, Davide and Pontikos, Nikolas and Queralt-Rosinach, Núria and Roos, Marco and Sass, Julian and Schofield, Paul N. and Seelow, Dominik and Siapos, Anastasios and Smedley, Damian and Smith, Lindsay D. and Steinhaus, Robin and Sundaramurthi, Jagadish Chandrabose and Swietlik, Emilia M. and Thun, Sylvia and Vasilevsky, Nicole A. and Wagner, Alex H. and Warner, Jeremy L. and Weiland, Claus and Haendel, Melissa A. and Robinson, Peter N.}, month = jun, year = {2022}, note = {Number: 6 Publisher: Nature Publishing Group}, keywords = {Computational biology and bioinformatics, Translational research}, pages = {817--820}, } @article{chatterjee_hl7_2022, title = {{HL7} {FHIR} with {SNOMED}-{CT} to {Achieve} {Semantic} and {Structural} {Interoperability} in {Personal} {Health} {Data}: {A} {Proof}-of-{Concept} {Study}}, volume = {22}, copyright = {http://creativecommons.org/licenses/by/3.0/}, issn = {1424-8220}, shorttitle = {{HL7} {FHIR} with {SNOMED}-{CT} to {Achieve} {Semantic} and {Structural} {Interoperability} in {Personal} {Health} {Data}}, url = {https://www.mdpi.com/1424-8220/22/10/3756}, doi = {10.3390/s22103756}, abstract = {Heterogeneity is a problem in storing and exchanging data in a digital health information system (HIS) following semantic and structural integrity. The existing literature shows different methods to overcome this problem. Fast healthcare interoperable resources (FHIR) as a structural standard may explain other information models, (e.g., personal, physiological, and behavioral data from heterogeneous sources, such as activity sensors, questionnaires, and interviews) with semantic vocabularies, (e.g., Systematized Nomenclature of Medicine—Clinical Terms (SNOMED-CT)) to connect personal health data to an electronic health record (EHR). We design and develop an intuitive health coaching (eCoach) smartphone application to prove the concept. We combine HL7 FHIR and SNOMED-CT vocabularies to exchange personal health data in JavaScript object notion (JSON). This study explores and analyzes our attempt to design and implement a structurally and logically compatible tethered personal health record (PHR) that allows bidirectional communication with an EHR. Our eCoach prototype implements most PHR-S FM functions as an interoperability quality standard. Its end-to-end (E2E) data are protected with a TSD (Services for Sensitive Data) security mechanism. We achieve 0\% data loss and 0\% unreliable performances during data transfer between PHR and EHR. Furthermore, this experimental study shows the effectiveness of FHIR modular resources toward flexible management of data components in the PHR (eCoach) prototype.}, language = {en}, number = {10}, urldate = {2022-09-12}, journal = {Sensors}, author = {Chatterjee, Ayan and Pahari, Nibedita and Prinz, Andreas}, month = jan, year = {2022}, note = {Number: 10 Publisher: Multidisciplinary Digital Publishing Institute}, keywords = {FHIR, HL7, PGHD, PHR, PHR-S FM, SNOMED-CT, TSD, eCoach, interoperability}, pages = {3756}, } @article{grimes_pathling_2022, title = {Pathling: analytics on {FHIR}}, volume = {13}, issn = {2041-1480}, shorttitle = {Pathling}, url = {https://doi.org/10.1186/s13326-022-00277-1}, doi = {10.1186/s13326-022-00277-1}, abstract = {Health data analytics is an area that is facing rapid change due to the acceleration of digitization of the health sector, and the changing landscape of health data and clinical terminology standards. Our research has identified a need for improved tooling to support analytics users in the task of analyzing Fast Healthcare Interoperability Resources (FHIR®) data and associated clinical terminology.}, number = {1}, urldate = {2022-09-12}, journal = {Journal of Biomedical Semantics}, author = {Grimes, John and Szul, Piotr and Metke-Jimenez, Alejandro and Lawley, Michael and Loi, Kylynn}, month = sep, year = {2022}, keywords = {Clinical terminology, Data analytics, FHIR, FHIRPath, Interoperability, SNOMED CT}, pages = {23}, } @article{vorisek_fast_2022, title = {Fast {Healthcare} {Interoperability} {Resources} ({FHIR}) for {Interoperability} in {Health} {Research}: {Systematic} {Review}}, volume = {10}, copyright = {This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.}, shorttitle = {Fast {Healthcare} {Interoperability} {Resources} ({FHIR}) for {Interoperability} in {Health} {Research}}, url = {https://medinform.jmir.org/2022/7/e35724}, doi = {10.2196/35724}, abstract = {Background: The standard Fast Healthcare Interoperability Resources (FHIR) is widely used in health information technology. However, its use as a standard for health research is still less prevalent. To use existing data sources more efficiently for health research, data interoperability becomes increasingly important. FHIR provides solutions by offering resource domains such as “Public Health \& Research” and “Evidence-Based Medicine” while using already established web technologies. Therefore, FHIR could help standardize data across different data sources and improve interoperability in health research. Objective: The aim of our study was to provide a systematic review of existing literature and determine the current state of FHIR implementations in health research and possible future directions. Methods: We searched the PubMed/MEDLINE, Embase, Web of Science, IEEE Xplore, and Cochrane Library databases for studies published from 2011 to 2022. Studies investigating the use of FHIR in health research were included. Articles published before 2011, abstracts, reviews, editorials, and expert opinions were excluded. We followed the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines and registered this study with PROSPERO (CRD42021235393). Data synthesis was done in tables and figures. Results: We identified a total of 998 studies, of which 49 studies were eligible for inclusion. Of the 49 studies, most (73\%, n=36) covered the domain of clinical research, whereas the remaining studies focused on public health or epidemiology (6\%, n=3) or did not specify their research domain (20\%, n=10). Studies used FHIR for data capture (29\%, n=14), standardization of data (41\%, n=20), analysis (12\%, n=6), recruitment (14\%, n=7), and consent management (4\%, n=2). Most (55\%, 27/49) of the studies had a generic approach, and 55\% (12/22) of the studies focusing on specific medical specialties (infectious disease, genomics, oncology, environmental health, imaging, and pulmonary hypertension) reported their solutions to be conferrable to other use cases. Most (63\%, 31/49) of the studies reported using additional data models or terminologies: Systematized Nomenclature of Medicine Clinical Terms (29\%, n=14), Logical Observation Identifiers Names and Codes (37\%, n=18), International Classification of Diseases 10th Revision (18\%, n=9), Observational Medical Outcomes Partnership common data model (12\%, n=6), and others (43\%, n=21). Only 4 (8\%) studies used a FHIR resource from the domain “Public Health \& Research.” Limitations using FHIR included the possible change in the content of FHIR resources, safety, legal matters, and the need for a FHIR server. Conclusions: Our review found that FHIR can be implemented in health research, and the areas of application are broad and generalizable in most use cases. The implementation of international terminologies was common, and other standards such as the Observational Medical Outcomes Partnership common data model could be used as a complement to FHIR. Limitations such as the change of FHIR content, lack of FHIR implementation, safety, and legal matters need to be addressed in future releases to expand the use of FHIR and, therefore, interoperability in health research.}, language = {EN}, number = {7}, urldate = {2022-09-12}, journal = {JMIR Medical Informatics}, author = {Vorisek, Carina Nina and Lehne, Moritz and Klopfenstein, Sophie Anne Ines and Mayer, Paula Josephine and Bartschke, Alexander and Haese, Thomas and Thun, Sylvia}, month = jul, year = {2022}, note = {Company: JMIR Medical Informatics Distributor: JMIR Medical Informatics Institution: JMIR Medical Informatics Label: JMIR Medical Informatics Publisher: JMIR Publications Inc., Toronto, Canada}, pages = {e35724}, } @article{lawson_data_2021, title = {The {Data} {Use} {Ontology} to streamline responsible access to human biomedical datasets}, volume = {1}, issn = {2666-979X}, url = {https://www.sciencedirect.com/science/article/pii/S2666979X21000355}, doi = {10.1016/j.xgen.2021.100028}, abstract = {Human biomedical datasets that are critical for research and clinical studies to benefit human health also often contain sensitive or potentially identifying information of individual participants. Thus, care must be taken when they are processed and made available to comply with ethical and regulatory frameworks and informed consent data conditions. To enable and streamline data access for these biomedical datasets, the Global Alliance for Genomics and Health (GA4GH) Data Use and Researcher Identities (DURI) work stream developed and approved the Data Use Ontology (DUO) standard. DUO is a hierarchical vocabulary of human and machine-readable data use terms that consistently and unambiguously represents a dataset’s allowable data uses. DUO has been implemented by major international stakeholders such as the Broad and Sanger Institutes and is currently used in annotation of over 200,000 datasets worldwide. Using DUO in data management and access facilitates researchers’ discovery and access of relevant datasets. DUO annotations increase the FAIRness of datasets and support data linkages using common data use profiles when integrating the data for secondary analyses. DUO is implemented in the Web Ontology Language (OWL) and, to increase community awareness and engagement, hosted in an open, centralized GitHub repository. DUO, together with the GA4GH Passport standard, offers a new, efficient, and streamlined data authorization and access framework that has enabled increased sharing of biomedical datasets worldwide.}, language = {en}, number = {2}, urldate = {2022-09-12}, journal = {Cell Genomics}, author = {Lawson, Jonathan and Cabili, Moran N. and Kerry, Giselle and Boughtwood, Tiffany and Thorogood, Adrian and Alper, Pinar and Bowers, Sarion R. and Boyles, Rebecca R. and Brookes, Anthony J. and Brush, Matthew and Burdett, Tony and Clissold, Hayley and Donnelly, Stacey and Dyke, Stephanie O. M. and Freeberg, Mallory A. and Haendel, Melissa A. and Hata, Chihiro and Holub, Petr and Jeanson, Francis and Jene, Aina and Kawashima, Minae and Kawashima, Shuichi and Konopko, Melissa and Kyomugisha, Irene and Li, Haoyuan and Linden, Mikael and Rodriguez, Laura Lyman and Morita, Mizuki and Mulder, Nicola and Muller, Jean and Nagaie, Satoshi and Nasir, Jamal and Ogishima, Soichi and Ota Wang, Vivian and Paglione, Laura D. and Pandya, Ravi N. and Parkinson, Helen and Philippakis, Anthony A. and Prasser, Fabian and Rambla, Jordi and Reinold, Kathy and Rushton, Gregory A. and Saltzman, Andrea and Saunders, Gary and Sofia, Heidi J. and Spalding, John D. and Swertz, Morris A. and Tulchinsky, Ilia and van Enckevort, Esther J. and Varma, Susheel and Voisin, Craig and Yamamoto, Natsuko and Yamasaki, Chisato and Zass, Lyndon and Guidry Auvil, Jaime M. and Nyrönen, Tommi H. and Courtot, Mélanie}, month = nov, year = {2021}, keywords = {FAIR, GA4GH, automated data access, consent, controlled access, data access, data restrictions, ontology, secondary data use, standard}, pages = {100028}, } @article{voisin_ga4gh_2021, title = {{GA4GH} {Passport} standard for digital identity and access permissions}, volume = {1}, issn = {2666-979X}, url = {https://www.sciencedirect.com/science/article/pii/S2666979X21000379}, doi = {10.1016/j.xgen.2021.100030}, abstract = {The Global Alliance for Genomics and Health (GA4GH) supports international standards that enable a federated data sharing model for the research community while respecting data security, ethical and regulatory frameworks, and data authorization and access processes for sensitive data. The GA4GH Passport standard (Passport) defines a machine-readable digital identity that conveys roles and data access permissions (called “visas”) for individual users. Visas are issued by data stewards, including data access committees (DACs) working with public databases, the entities responsible for the quality, integrity, and access arrangements for the datasets in the management of human biomedical data. Passports streamline management of data access rights across data systems by using visas that present a data user’s digital identity and permissions across organizations, tools, environments, and services. We describe real-world implementations of the GA4GH Passport standard in use cases from ELIXIR Europe, National Institutes of Health, and the Autism Sharing Initiative. These implementations demonstrate that the Passport standard has provided transparent mechanisms for establishing permissions and authorizing data access across platforms.}, language = {en}, number = {2}, urldate = {2022-09-12}, journal = {Cell Genomics}, author = {Voisin, Craig and Linden, Mikael and Dyke, Stephanie O. M. and Bowers, Sarion R. and Alper, Pinar and Barkley, Maxmillian P. and Bernick, David and Chao, Jianpeng and Courtot, Mélanie and Jeanson, Francis and Konopko, Melissa A. and Kuba, Martin and Lawson, Jonathan and Leinonen, Jaakko and Li, Stephanie and Ota Wang, Vivian and Philippakis, Anthony A. and Reinold, Kathy and Rushton, Gregory A. and Spalding, J. Dylan and Törnroos, Juha and Tulchinsky, Ilya and Guidry Auvil, Jaime M. and Nyrönen, Tommi H.}, month = nov, year = {2021}, keywords = {access, authorization, data, federation, genomics, identity, infrastructure, regulation, security, standard}, pages = {100030}, } @article{wagner_ga4gh_2021, title = {The {GA4GH} {Variation} {Representation} {Specification}: {A} computational framework for variation representation and federated identification}, volume = {1}, issn = {2666-979X}, shorttitle = {The {GA4GH} {Variation} {Representation} {Specification}}, url = {https://www.sciencedirect.com/science/article/pii/S2666979X21000343}, doi = {10.1016/j.xgen.2021.100027}, abstract = {Maximizing the personal, public, research, and clinical value of genomic information will require the reliable exchange of genetic variation data. We report here the Variation Representation Specification (VRS, pronounced “verse”), an extensible framework for the computable representation of variation that complements contemporary human-readable and flat file standards for genomic variation representation. VRS provides semantically precise representations of variation and leverages this design to enable federated identification of biomolecular variation with globally consistent and unique computed identifiers. The VRS framework includes a terminology and information model, machine-readable schema, data sharing conventions, and a reference implementation, each of which is intended to be broadly useful and freely available for community use. VRS was developed by a partnership among national information resource providers, public initiatives, and diagnostic testing laboratories under the auspices of the Global Alliance for Genomics and Health (GA4GH).}, language = {en}, number = {2}, urldate = {2022-09-12}, journal = {Cell Genomics}, author = {Wagner, Alex H. and Babb, Lawrence and Alterovitz, Gil and Baudis, Michael and Brush, Matthew and Cameron, Daniel L. and Cline, Melissa and Griffith, Malachi and Griffith, Obi L. and Hunt, Sarah E. and Kreda, David and Lee, Jennifer M. and Li, Stephanie and Lopez, Javier and Moyer, Eric and Nelson, Tristan and Patel, Ronak Y. and Riehle, Kevin and Robinson, Peter N. and Rynearson, Shawn and Schuilenburg, Helen and Tsukanov, Kirill and Walsh, Brian and Konopko, Melissa and Rehm, Heidi L. and Yates, Andrew D. and Freimuth, Robert R. and Hart, Reece K.}, month = nov, year = {2021}, keywords = {GA4GH, VOCA, computed identifiers, data exchange, federated identification, genomics, standard, value objects, variant, variation}, pages = {100027}, } @article{perez-riverol_discovering_2017, title = {Discovering and linking public omics data sets using the {Omics} {Discovery} {Index}}, volume = {35}, copyright = {2017 Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, issn = {1546-1696}, url = {https://www.nature.com/articles/nbt.3790}, doi = {10.1038/nbt.3790}, language = {en}, number = {5}, urldate = {2022-09-12}, journal = {Nature Biotechnology}, author = {Perez-Riverol, Yasset and Bai, Mingze and da Veiga Leprevost, Felipe and Squizzato, Silvano and Park, Young Mi and Haug, Kenneth and Carroll, Adam J. and Spalding, Dylan and Paschall, Justin and Wang, Mingxun and del-Toro, Noemi and Ternent, Tobias and Zhang, Peng and Buso, Nicola and Bandeira, Nuno and Deutsch, Eric W. and Campbell, David S. and Beavis, Ronald C. and Salek, Reza M. and Sarkans, Ugis and Petryszak, Robert and Keays, Maria and Fahy, Eoin and Sud, Manish and Subramaniam, Shankar and Barbera, Ariana and Jiménez, Rafael C. and Nesvizhskii, Alexey I. and Sansone, Susanna-Assunta and Steinbeck, Christoph and Lopez, Rodrigo and Vizcaíno, Juan A. and Ping, Peipei and Hermjakob, Henning}, month = may, year = {2017}, note = {Number: 5 Publisher: Nature Publishing Group}, keywords = {Computational platforms and environments, Data integration, Data mining, Data publication and archiving}, pages = {406--409}, } @misc{davis_geoquery_2022, title = {{GEOquery}: {Get} data from {NCBI} {Gene} {Expression} {Omnibus} ({GEO})}, copyright = {MIT}, shorttitle = {{GEOquery}}, url = {https://bioconductor.org/packages/GEOquery/}, abstract = {The NCBI Gene Expression Omnibus (GEO) is a public repository of microarray data. Given the rich and varied nature of this resource, it is only natural to want to apply BioConductor tools to these data. GEOquery is the bridge between GEO and BioConductor.}, urldate = {2022-09-09}, publisher = {Bioconductor version: Release (3.15)}, author = {Davis, Sean}, year = {2022}, doi = {10.18129/B9.bioc.GEOquery}, keywords = {DataImport, Microarray, OneChannel, SAGE, Software, TwoChannel}, } @article{kauffmann_importing_2009, title = {Importing {ArrayExpress} datasets into {R}/{Bioconductor}}, volume = {25}, issn = {1367-4803}, url = {https://doi.org/10.1093/bioinformatics/btp354}, doi = {10.1093/bioinformatics/btp354}, abstract = {Summary:ArrayExpress is one of the largest public repositories of microarray datasets. R/Bioconductor provides a comprehensive suite of microarray analysis and integrative bioinformatics software. However, easy ways for importing datasets from ArrayExpress into R/Bioconductor have been lacking. Here, we present such a tool that is suitable for both interactive and automated use.Availability: The ArrayExpress package is available from the Bioconductor project at http://www.bioconductor.org. A users guide and examples are provided with the package.Contact:audrey@ebi.ac.ukSupplementary information:Supplementary data are available Bioinformatics online.}, number = {16}, urldate = {2022-09-09}, journal = {Bioinformatics}, author = {Kauffmann, Audrey and Rayner, Tim F. and Parkinson, Helen and Kapushesky, Misha and Lukk, Margus and Brazma, Alvis and Huber, Wolfgang}, month = aug, year = {2009}, pages = {2092--2094}, } @misc{kauffmann_arrayexpress_2022, title = {{ArrayExpress}: {Access} the {ArrayExpress} {Microarray} {Database} at {EBI} and build {Bioconductor} data structures: {ExpressionSet}, {AffyBatch}, {NChannelSet}}, copyright = {Artistic-2.0}, shorttitle = {{ArrayExpress}}, url = {https://bioconductor.org/packages/ArrayExpress/}, abstract = {Access the ArrayExpress Repository at EBI and build Bioconductor data structures: ExpressionSet, AffyBatch, NChannelSet}, urldate = {2022-09-09}, publisher = {Bioconductor version: Release (3.15)}, author = {Kauffmann, Audrey and Emam, Ibrahim and Schubert, Michael}, year = {2022}, doi = {10.18129/B9.bioc.ArrayExpress}, keywords = {DataImport, Microarray, OneChannel, Software, TwoChannel}, } @article{rayner_simple_2006, title = {A simple spreadsheet-based, {MIAME}-supportive format for microarray data: {MAGE}-{TAB}}, volume = {7}, issn = {1471-2105}, shorttitle = {A simple spreadsheet-based, {MIAME}-supportive format for microarray data}, url = {https://doi.org/10.1186/1471-2105-7-489}, doi = {10.1186/1471-2105-7-489}, abstract = {Sharing of microarray data within the research community has been greatly facilitated by the development of the disclosure and communication standards MIAME and MAGE-ML by the MGED Society. However, the complexity of the MAGE-ML format has made its use impractical for laboratories lacking dedicated bioinformatics support.}, number = {1}, urldate = {2022-09-09}, journal = {BMC Bioinformatics}, author = {Rayner, Tim F. and Rocca-Serra, Philippe and Spellman, Paul T. and Causton, Helen C. and Farne, Anna and Holloway, Ele and Irizarry, Rafael A. and Liu, Junmin and Maier, Donald S. and Miller, Michael and Petersen, Kjell and Quackenbush, John and Sherlock, Gavin and Stoeckert, Christian J. and White, Joseph and Whetzel, Patricia L. and Wymore, Farrell and Parkinson, Helen and Sarkans, Ugis and Ball, Catherine A. and Brazma, Alvis}, month = nov, year = {2006}, keywords = {Directed Acyclic Graph, Experimental Factor, Investigation Design, Microarray Gene Expression Data, Microarray Investigation}, pages = {489}, } @article{athar_arrayexpress_2019, title = {{ArrayExpress} update – from bulk to single-cell expression data}, volume = {47}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gky964}, doi = {10.1093/nar/gky964}, abstract = {ArrayExpress (https://www.ebi.ac.uk/arrayexpress) is an archive of functional genomics data from a variety of technologies assaying functional modalities of a genome, such as gene expression or promoter occupancy. The number of experiments based on sequencing technologies, in particular RNA-seq experiments, has been increasing over the last few years and submissions of sequencing data have overtaken microarray experiments in the last 12 months. Additionally, there is a significant increase in experiments investigating single cells, rather than bulk samples, known as single-cell RNA-seq. To accommodate these trends, we have substantially changed our submission tool Annotare which, along with raw and processed data, collects all metadata necessary to interpret these experiments. Selected datasets are re-processed and loaded into our sister resource, the value-added Expression Atlas (and its component Single Cell Expression Atlas), which not only enables users to interpret the data easily but also serves as a test for data quality. With an increasing number of studies that combine different assay modalities (multi-omics experiments), a new more general archival resource the BioStudies Database has been developed, which will eventually supersede ArrayExpress. Data submissions will continue unchanged; all existing ArrayExpress data will be incorporated into BioStudies and the existing accession numbers and application programming interfaces will be maintained.}, number = {D1}, urldate = {2022-09-09}, journal = {Nucleic Acids Research}, author = {Athar, Awais and Füllgrabe, Anja and George, Nancy and Iqbal, Haider and Huerta, Laura and Ali, Ahmed and Snow, Catherine and Fonseca, Nuno A and Petryszak, Robert and Papatheodorou, Irene and Sarkans, Ugis and Brazma, Alvis}, month = jan, year = {2019}, pages = {D711--D715}, } @article{wall_genomeasia_2019, title = {The {GenomeAsia} {100K} {Project} enables genetic discoveries across {Asia}}, volume = {576}, copyright = {2019 The Author(s)}, issn = {1476-4687}, url = {https://www.nature.com/articles/s41586-019-1793-z}, doi = {10.1038/s41586-019-1793-z}, abstract = {The underrepresentation of non-Europeans in human genetic studies so far has limited the diversity of individuals in genomic datasets and led to reduced medical relevance for a large proportion of the world’s population. Population-specific reference genome datasets as well as genome-wide association studies in diverse populations are needed to address this issue. Here we describe the pilot phase of the GenomeAsia 100K Project. This includes a whole-genome sequencing reference dataset from 1,739 individuals of 219 population groups and 64 countries across Asia. We catalogue genetic variation, population structure, disease associations and founder effects. We also explore the use of this dataset in imputation, to facilitate genetic studies in populations across Asia and worldwide.}, language = {en}, number = {7785}, urldate = {2022-09-06}, journal = {Nature}, author = {Wall, Jeffrey D. and Stawiski, Eric W. and Ratan, Aakrosh and Kim, Hie Lim and Kim, Changhoon and Gupta, Ravi and Suryamohan, Kushal and Gusareva, Elena S. and Purbojati, Rikky Wenang and Bhangale, Tushar and Stepanov, Vadim and Kharkov, Vladimir and Schröder, Markus S. and Ramprasad, Vedam and Tom, Jennifer and Durinck, Steffen and Bei, Qixin and Li, Jiani and Guillory, Joseph and Phalke, Sameer and Basu, Analabha and Stinson, Jeremy and Nair, Sandhya and Malaichamy, Sivasankar and Biswas, Nidhan K. and Chambers, John C. and Cheng, Keith C. and George, Joyner T. and Khor, Seik Soon and Kim, Jong-Il and Cho, Belong and Menon, Ramesh and Sattibabu, Thiramsetti and Bassi, Akshi and Deshmukh, Manjari and Verma, Anjali and Gopalan, Vivek and Shin, Jong-Yeon and Pratapneni, Mahesh and Santhosh, Sam and Tokunaga, Katsushi and Md-Zain, Badrul M. and Chan, Kok Gan and Parani, Madasamy and Natarajan, Purushothaman and Hauser, Michael and Allingham, R. Rand and Santiago-Turla, Cecilia and Ghosh, Arkasubhra and Gadde, Santosh Gopi Krishna and Fuchsberger, Christian and Forer, Lukas and Schoenherr, Sebastian and Sudoyo, Herawati and Lansing, J. Stephen and Friedlaender, Jonathan and Koki, George and Cox, Murray P. and Hammer, Michael and Karafet, Tatiana and Ang, Khai C. and Mehdi, Syed Q. and Radha, Venkatesan and Mohan, Viswanathan and Majumder, Partha P. and Seshagiri, Somasekar and Seo, Jeong-Sun and Schuster, Stephan C. and Peterson, Andrew S. and {GenomeAsia100K Consortium}}, month = dec, year = {2019}, note = {Number: 7785 Publisher: Nature Publishing Group}, keywords = {Genetic variation}, pages = {106--111}, } @article{prictor_australian_2020, title = {Australian {Aboriginal} and {Torres} {Strait} {Islander} {Collections} of {Genetic} {Heritage}: {The} {Legal}, {Ethical} and {Practical} {Considerations} of a {Dynamic} {Consent} {Approach} to {Decision} {Making}}, volume = {48}, issn = {1073-1105, 1748-720X}, shorttitle = {Australian {Aboriginal} and {Torres} {Strait} {Islander} {Collections} of {Genetic} {Heritage}}, url = {https://www.cambridge.org/core/journals/journal-of-law-medicine-and-ethics/article/australian-aboriginal-and-torres-strait-islander-collections-of-genetic-heritage-the-legal-ethical-and-practical-considerations-of-a-dynamic-consent-approach-to-decision-making/B094693EE78CA8DBB725E01B12CCB5F0#}, doi = {10.1177/1073110520917012}, abstract = {Dynamic Consent (DC) is both a model and a specific web-based tool that enables clear, granular communication and recording of participant consent choices over time. The DC model enables individuals to know and to decide how personal research information is being used and provides a way in which to exercise legal rights provided in privacy and data protection law. The DC tool is flexible and responsive, enabling legal and ethical requirements in research data sharing to be met and for online health information to be maintained. DC has been used in rare diseases and genomics, to enable people to control and express their preferences regarding their own data. However, DC has never been explored in relationship to historical collections of bioscientific and genetic heritage or to contexts involving Aboriginal and Torres Strait Islander people (First Peoples of Australia).In response to the growing interest by First Peoples throughout Australia in genetic and genomic research, and the increasing number of invitations from researchers to participate in community health and wellbeing projects, this article examines the legal and ethical attributes and challenges of DC in these contexts. It also explores opportunities for including First Peoples' cultural perspectives, governance, and leadership as a method for defining (or redefining) DC on cultural terms that engage best practice research and data analysis as well as respect for meaningful and longitudinal individual and family participation.}, language = {en}, number = {1}, urldate = {2022-09-05}, journal = {Journal of Law, Medicine \& Ethics}, author = {Prictor, Megan and Huebner, Sharon and Teare, Harriet J. A. and Burchill, Luke and Kaye, Jane}, year = {2020}, note = {Publisher: Cambridge University Press}, pages = {205--217}, } @article{nielsen_returning_2022, title = {Returning raw genomic data: rights of research participants and obligations of health care professionals}, volume = {216}, issn = {1326-5377}, shorttitle = {Returning raw genomic data}, url = {https://onlinelibrary.wiley.com/doi/abs/10.5694/mja2.51546}, doi = {10.5694/mja2.51546}, language = {en}, number = {11}, urldate = {2022-09-05}, journal = {Medical Journal of Australia}, author = {Nielsen, Jane L and Johnston, Carolyn and O'Brien, Tracey and Tyrrell, Vanessa J}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.5694/mja2.51546}, keywords = {Clinical decision-making, Delivery of healthcare, Ethics, Genetic testing, Legislation, Medical records, Medicolegal, Patient rights, Sequence analysis, medical}, pages = {550--552}, } @article{jowett_genomic_2020, title = {Genomic {Research} and {Data}-{Sharing} : {Time} to {Revisit} {Australian} {Laws}?}, volume = {39}, copyright = {free\_to\_read}, issn = {0083-4041}, shorttitle = {Genomic {Research} and {Data}-{Sharing}}, url = {https://eprints.qut.edu.au/203714/}, abstract = {This article analyses the ethical and legal aspects of data-sharing and genomic research. It begins in Part II with an overview of the nature of genomic information, and whether it is exceptional when compared to other forms of health information. Part III considers the role of data-sharing in genomic research, with the importance of public trust in supporting genomic research considered in Part IV. The Australian regulatory framework for data-sharing in genomic research is considered in Parts V and VI, with reform options discussed in Part VII. The article concludes that advances in genomic research and the complexity of the current regulatory framework make it timely to review Australian laws to ensure that they maintain their relevance for this rapidly developing field of research.}, language = {en}, number = {2}, urldate = {2022-09-05}, journal = {The University of Queensland Law Journal}, author = {Jowett, Stephanie and Dallaston, Elizabeth and Bennett, Belinda}, month = aug, year = {2020}, note = {Number: 2 Publisher: The University of Queensland}, keywords = {data, law, legal, privacy, regulation}, pages = {341--369}, } @techreport{ingena_genomic_2021, title = {Genomic {Data} in {Australia}: {An} industry perspective on clarity, certainty and standardisation}, abstract = {The collection, management and exchange of genomic data into healthcare delivery is critical to supporting better person-centred care, driving value to the health system and to building a thriving research ecosystem. The delivery of mature genomics information management which supports the integration of genomics into mainstream health care requires a comprehensive and collaborative approach. The genomics industry has an important role in delivering this maturity and ensuring that value is derived for all stakeholders. With the establishment of the Industry Genomics Network Alliance (InGeNA) in 2020, it is now appropriate to examine industry’s role in leveraging the value of genomic data to support better health outcomes for Australians. Engagement with stakeholders from industry, healthcare, research and consumers indicated three consistent themes, being a need for clarity in how the sector communicates, certainty about regulatory and compliance matters, and the importance of standards adoption to support the application of genomics to realise the value for people, the healthcare sector and the research community. As a united voice for the genomics industry, InGeNA has undertaken a review of the Australian genomic data landscape to identify the challenges and opportunities evidence regarding the management, governance, sharing and use of genomic data to support better outcomes for all Australians. This report then considers the role that InGeNA can play in supporting the delivery of this potential. By establishing shared positions regarding how genomic data is shared, managed and used in Australia, InGeNA can engage with governments at all levels, and with professional bodies, consumer groups, research groups and the broader healthcare sector to support and encourage the appropriate use of genomic data in Australia without the perception of self-interest that any one organisation may attract. Indeed, the very diversity of interests among the InGeNA membership ensures that a balanced and positive position can be found across a range of topics. This report is intended primarily as an input to planning for InGeNA. However, it should also serve as a useful guide to the value industry can bring to public health, research and the broader sector.}, author = {InGeNA}, year = {2021}, } @article{vidgen_sharing_2020, title = {Sharing genomic data from clinical testing with researchers: public survey of expectations of clinical genomic data management in {Queensland}, {Australia}}, volume = {21}, issn = {1472-6939}, shorttitle = {Sharing genomic data from clinical testing with researchers}, url = {https://doi.org/10.1186/s12910-020-00563-6}, doi = {10.1186/s12910-020-00563-6}, abstract = {There has been considerable investment and strategic planning to introduce genomic testing into Australia’s public health system. As more patients’ genomic data is being held by the public health system, there will be increased requests from researchers to access this data. It is important that public policy reflects public expectations for how genomic data that is generated from clinical tests is used. To inform public policy and discussions around genomic data sharing, we sought public opinions on using genomic data contained in medical records for research purposes in the Australian state of Queensland.}, number = {1}, urldate = {2022-09-05}, journal = {BMC Medical Ethics}, author = {Vidgen, Miranda E. and Kaladharan, Sid and Malacova, Eva and Hurst, Cameron and Waddell, Nicola}, month = nov, year = {2020}, keywords = {Data linkage, Data sharing, Genomic, Health information, Public views, Secondary use}, pages = {119}, } @article{sansone_fairsharing_2019, title = {{FAIRsharing} as a community approach to standards, repositories and policies}, volume = {37}, copyright = {2019 The Author(s)}, issn = {1546-1696}, url = {https://www.nature.com/articles/s41587-019-0080-8}, doi = {10.1038/s41587-019-0080-8}, language = {en}, number = {4}, urldate = {2022-09-01}, journal = {Nature Biotechnology}, author = {Sansone, Susanna-Assunta and McQuilton, Peter and Rocca-Serra, Philippe and Gonzalez-Beltran, Alejandra and Izzo, Massimiliano and Lister, Allyson L. and Thurston, Milo}, month = apr, year = {2019}, note = {Number: 4 Publisher: Nature Publishing Group}, keywords = {Data publication and archiving, Education, Research data, Research management, Standards}, pages = {358--367}, } @techreport{cannon_repository_2021, title = {Repository {Features} to {Help} {Researchers}: {An} invitation to a dialogue}, shorttitle = {Repository {Features} to {Help} {Researchers}}, url = {https://zenodo.org/record/4683794}, abstract = {A group of publishers came together to discuss how we could reduce the complexity and inconsistency provided in publisher's advice to researchers when selecting an appropriate data repository. It is a shared goal among publishers and other stakeholders to increase repository use – which remains far from optimal – and we assume that helping researchers find a suitable repository more easily will help achieve this. To address this a list of features has been created and it is intended only as a framework within which publishers can make recommendations to researchers, not as a way to restrict which repositories researchers may choose for their data. Our intention is that the features we highlight will act to initiate engagement and collaboration among publishers, repositories and the RPOs, government and funders that ultimately make the policies around Open Research. As we start this conversation, it is important that we act together with other stakeholders to raise awareness of the challenges involved around FAIR data and to prevent any perverse consequences. From the RDA FAIRsharing WG point of view, the ultimate objective is to map repository features across all existing initiatives, and to identify a common core set of metadata fields that all stakeholders want to see in registry of repositories. The FAIRsharing registry in particular is agnostic as to the selection process of standards, repositories and policies, as part of its commitment to working with and for all stakeholder groups.}, language = {eng}, urldate = {2022-08-31}, institution = {Zenodo}, author = {Cannon, Matthew and Graf, Chris and McNeice, Kiera and Chan, Wei Mun and Callaghan, Sarah and Carnevale, Ilaria and Cranston, Imogen and Edmunds, Scott C. and Everitt, Nicholas and Ganley, Emma and Hrynaszkiewicz, Iain and Khodiyar, Varsha K. and Leary, Adam and Lemberger, Thomas and MacCallum, Catriona J. and Murray, Hollydawn and Sharples, Kathryn and Soares E Silva, Marina and Wright, Guillaume and (Moderator) McQuilton, Peter and (Moderator) Sansone, Susanna-Assunta}, month = apr, year = {2021}, doi = {10.5281/zenodo.4683794}, keywords = {Core Trust Seal (CTS), Criteria, Data policy, Data sharing, FAIR Principles, FAIRsharing, Force11, Publishers, Repositories, Reproducibility, Research Data Alliance (RDA), Standardisation, TRUST}, } @article{hudson_rights_2020, title = {Rights, interests and expectations: {Indigenous} perspectives on unrestricted access to genomic data}, volume = {21}, copyright = {2020 Springer Nature Limited}, issn = {1471-0064}, shorttitle = {Rights, interests and expectations}, url = {https://www.nature.com/articles/s41576-020-0228-x}, doi = {10.1038/s41576-020-0228-x}, abstract = {Addressing Indigenous rights and interests in genetic resources has become increasingly challenging in an open science environment that promotes unrestricted access to genomic data. Although Indigenous experiences with genetic research have been shaped by a series of negative interactions, there is increasing recognition that equitable benefits can only be realized through greater participation of Indigenous communities. Issues of trust, accountability and equity underpin Indigenous critiques of genetic research and the sharing of genomic data. This Perspectives article highlights identified issues for Indigenous communities around the sharing of genomic data and suggests principles and actions that genomic researchers can adopt to recognize community rights and interests in data.}, language = {en}, number = {6}, urldate = {2022-08-31}, journal = {Nature Reviews Genetics}, author = {Hudson, Maui and Garrison, Nanibaa’ A. and Sterling, Rogena and Caron, Nadine R. and Fox, Keolu and Yracheta, Joseph and Anderson, Jane and Wilcox, Phil and Arbour, Laura and Brown, Alex and Taualii, Maile and Kukutai, Tahu and Haring, Rodney and Te Aika, Ben and Baynam, Gareth S. and Dearden, Peter K. and Chagné, David and Malhi, Ripan S. and Garba, Ibrahim and Tiffin, Nicki and Bolnick, Deborah and Stott, Matthew and Rolleston, Anna K. and Ballantyne, Leah L. and Lovett, Ray and David-Chavez, Dominique and Martinez, Andrew and Sporle, Andrew and Walter, Maggie and Reading, Jeff and Carroll, Stephanie Russo}, month = jun, year = {2020}, note = {Number: 6 Publisher: Nature Publishing Group}, keywords = {Ethics, Genetic databases, Genome-wide association studies, Law and regulation, Population genetics}, pages = {377--384}, } @article{mukherjee_genomes_2021, title = {Genomes {OnLine} {Database} ({GOLD}) v.8: overview and updates}, volume = {49}, issn = {1362-4962}, shorttitle = {Genomes {OnLine} {Database} ({GOLD}) v.8}, url = {https://europepmc.org/articles/PMC7778979}, doi = {10.1093/nar/gkaa983}, abstract = {The Genomes OnLine Database (GOLD) (https://gold.jgi.doe.gov/) is a manually curated, daily updated collection of genome projects and their metadata accumulated from around the world. The current version of the database includes over 1.17 million entries organized broadly into Studies (45 770), Organisms (387 382) or Biosamples (101 207), Sequencing Projects (355 364) and Analysis Projects (283 481). These four levels contain over 600 metadata fields, which includes 76 controlled vocabulary (CV) tables containing 3873 terms. GOLD provides an interactive web user interface for browsing and searching by a wide range of project and metadata fields. Users can enter details about their own projects in GOLD, which acts as a gatekeeper to ensure that metadata is accurately documented before submitting sequence information to the Integrated Microbial Genomes (IMG) system for analysis. In order to maintain a reference dataset for use by members of the scientific community, GOLD also imports projects from public repositories such as GenBank and SRA. The current status of the database, along with recent updates and improvements are described in this manuscript.}, language = {eng}, number = {D1}, urldate = {2022-08-31}, journal = {Nucleic acids research}, author = {Mukherjee, Supratim and Stamatis, Dimitri and Bertsch, Jon and Ovchinnikova, Galina and Sundaramurthi, Jagadish Chandrabose and Lee, Janey and Kandimalla, Mahathi and Chen, I-Min A and Kyrpides, Nikos C and Reddy, T B K}, month = jan, year = {2021}, pmid = {33152092}, pmcid = {PMC7778979}, pages = {D723--D733}, } @article{brambilla_pisoni_apropos_2022, title = {Apropos {Data} {Sharing}: {Abandon} the {Distrust} and {Embrace} the {Opportunity}}, volume = {41}, issn = {1044-5498}, shorttitle = {Apropos {Data} {Sharing}}, url = {https://www.liebertpub.com/doi/10.1089/dna.2021.0501}, doi = {10.1089/dna.2021.0501}, abstract = {In this commentary, we focus on the ethical challenges of data sharing and its potential in supporting biomedical research. Taking human genomics (HG) and European governance for sharing genomic data as a case study, we consider how to balance competing rights and interests—balancing protection of the privacy of data subjects and data security, with scientific progress and the need to promote public health. This is of particular relevancy in light of the current pandemic, which stresses the urgent need for international collaborations to promote health for all. We draw from existing ethical codes for data sharing in HG to offer recommendations as to how to protect rights while fostering scientific research and open science.}, number = {1}, urldate = {2022-08-31}, journal = {DNA and Cell Biology}, author = {Brambilla Pisoni, Giorgia and Taddeo, Mariarosaria}, month = jan, year = {2022}, note = {Publisher: Mary Ann Liebert, Inc., publishers}, keywords = {European Union, data sharing, digital ethics, human genomics, privacy, public health, rights}, pages = {11--15}, } @article{rehm_ga4gh_2021, title = {{GA4GH}: {International} policies and standards for data sharing across genomic research and healthcare}, volume = {1}, issn = {2666-979X}, shorttitle = {{GA4GH}}, url = {https://www.sciencedirect.com/science/article/pii/S2666979X21000367}, doi = {10.1016/j.xgen.2021.100029}, abstract = {The Global Alliance for Genomics and Health (GA4GH) aims to accelerate biomedical advances by enabling the responsible sharing of clinical and genomic data through both harmonized data aggregation and federated approaches. The decreasing cost of genomic sequencing (along with other genome-wide molecular assays) and increasing evidence of its clinical utility will soon drive the generation of sequence data from tens of millions of humans, with increasing levels of diversity. In this perspective, we present the GA4GH strategies for addressing the major challenges of this data revolution. We describe the GA4GH organization, which is fueled by the development efforts of eight Work Streams and informed by the needs of 24 Driver Projects and other key stakeholders. We present the GA4GH suite of secure, interoperable technical standards and policy frameworks and review the current status of standards, their relevance to key domains of research and clinical care, and future plans of GA4GH. Broad international participation in building, adopting, and deploying GA4GH standards and frameworks will catalyze an unprecedented effort in data sharing that will be critical to advancing genomic medicine and ensuring that all populations can access its benefits.}, language = {en}, number = {2}, urldate = {2022-08-31}, journal = {Cell Genomics}, author = {Rehm, Heidi L. and Page, Angela J. H. and Smith, Lindsay and Adams, Jeremy B. and Alterovitz, Gil and Babb, Lawrence J. and Barkley, Maxmillian P. and Baudis, Michael and Beauvais, Michael J. S. and Beck, Tim and Beckmann, Jacques S. and Beltran, Sergi and Bernick, David and Bernier, Alexander and Bonfield, James K. and Boughtwood, Tiffany F. and Bourque, Guillaume and Bowers, Sarion R. and Brookes, Anthony J. and Brudno, Michael and Brush, Matthew H. and Bujold, David and Burdett, Tony and Buske, Orion J. and Cabili, Moran N. and Cameron, Daniel L. and Carroll, Robert J. and Casas-Silva, Esmeralda and Chakravarty, Debyani and Chaudhari, Bimal P. and Chen, Shu Hui and Cherry, J. Michael and Chung, Justina and Cline, Melissa and Clissold, Hayley L. and Cook-Deegan, Robert M. and Courtot, Mélanie and Cunningham, Fiona and Cupak, Miro and Davies, Robert M. and Denisko, Danielle and Doerr, Megan J. and Dolman, Lena I. and Dove, Edward S. and Dursi, L. Jonathan and Dyke, Stephanie O. M. and Eddy, James A. and Eilbeck, Karen and Ellrott, Kyle P. and Fairley, Susan and Fakhro, Khalid A. and Firth, Helen V. and Fitzsimons, Michael S. and Fiume, Marc and Flicek, Paul and Fore, Ian M. and Freeberg, Mallory A. and Freimuth, Robert R. and Fromont, Lauren A. and Fuerth, Jonathan and Gaff, Clara L. and Gan, Weiniu and Ghanaim, Elena M. and Glazer, David and Green, Robert C. and Griffith, Malachi and Griffith, Obi L. and Grossman, Robert L. and Groza, Tudor and Guidry Auvil, Jaime M. and Guigó, Roderic and Gupta, Dipayan and Haendel, Melissa A. and Hamosh, Ada and Hansen, David P. and Hart, Reece K. and Hartley, Dean Mitchell and Haussler, David and Hendricks-Sturrup, Rachele M. and Ho, Calvin W. L. and Hobb, Ashley E. and Hoffman, Michael M. and Hofmann, Oliver M. and Holub, Petr and Hsu, Jacob Shujui and Hubaux, Jean-Pierre and Hunt, Sarah E. and Husami, Ammar and Jacobsen, Julius O. and Jamuar, Saumya S. and Janes, Elizabeth L. and Jeanson, Francis and Jené, Aina and Johns, Amber L. and Joly, Yann and Jones, Steven J. M. and Kanitz, Alexander and Kato, Kazuto and Keane, Thomas M. and Kekesi-Lafrance, Kristina and Kelleher, Jerome and Kerry, Giselle and Khor, Seik-Soon and Knoppers, Bartha M. and Konopko, Melissa A. and Kosaki, Kenjiro and Kuba, Martin and Lawson, Jonathan and Leinonen, Rasko and Li, Stephanie and Lin, Michael F. and Linden, Mikael and Liu, Xianglin and Liyanage, Isuru Udara and Lopez, Javier and Lucassen, Anneke M. and Lukowski, Michael and Mann, Alice L. and Marshall, John and Mattioni, Michele and Metke-Jimenez, Alejandro and Middleton, Anna and Milne, Richard J. and Molnár-Gábor, Fruzsina and Mulder, Nicola and Munoz-Torres, Monica C. and Nag, Rishi and Nakagawa, Hidewaki and Nasir, Jamal and Navarro, Arcadi and Nelson, Tristan H. and Niewielska, Ania and Nisselle, Amy and Niu, Jeffrey and Nyrönen, Tommi H. and O’Connor, Brian D. and Oesterle, Sabine and Ogishima, Soichi and Ota Wang, Vivian and Paglione, Laura A. D. and Palumbo, Emilio and Parkinson, Helen E. and Philippakis, Anthony A. and Pizarro, Angel D. and Prlic, Andreas and Rambla, Jordi and Rendon, Augusto and Rider, Renee A. and Robinson, Peter N. and Rodarmer, Kurt W. and Rodriguez, Laura Lyman and Rubin, Alan F. and Rueda, Manuel and Rushton, Gregory A. and Ryan, Rosalyn S. and Saunders, Gary I. and Schuilenburg, Helen and Schwede, Torsten and Scollen, Serena and Senf, Alexander and Sheffield, Nathan C. and Skantharajah, Neerjah and Smith, Albert V. and Sofia, Heidi J. and Spalding, Dylan and Spurdle, Amanda B. and Stark, Zornitza and Stein, Lincoln D. and Suematsu, Makoto and Tan, Patrick and Tedds, Jonathan A. and Thomson, Alastair A. and Thorogood, Adrian and Tickle, Timothy L. and Tokunaga, Katsushi and Törnroos, Juha and Torrents, David and Upchurch, Sean and Valencia, Alfonso and Guimera, Roman Valls and Vamathevan, Jessica and Varma, Susheel and Vears, Danya F. and Viner, Coby and Voisin, Craig and Wagner, Alex H. and Wallace, Susan E. and Walsh, Brian P. and Williams, Marc S. and Winkler, Eva C. and Wold, Barbara J. and Wood, Grant M. and Woolley, J. Patrick and Yamasaki, Chisato and Yates, Andrew D. and Yung, Christina K. and Zass, Lyndon J. and Zaytseva, Ksenia and Zhang, Junjun and Goodhand, Peter and North, Kathryn and Birney, Ewan}, month = nov, year = {2021}, keywords = {bioethics, data access, data federation, data sharing, genomics, learning health system, policy, precision medicine, standards}, pages = {100029}, } @article{ladewig_ga4gh_2022, title = {{GA4GH} {Phenopackets}: {A} {Practical} {Introduction}}, volume = {n/a}, issn = {2641-6573}, shorttitle = {{GA4GH} {Phenopackets}}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/ggn2.202200016}, doi = {10.1002/ggn2.202200016}, abstract = {The Global Alliance for Genomics and Health (GA4GH) is developing a suite of coordinated standards for genomics for healthcare. The Phenopacket is a new GA4GH standard for sharing disease and phenotype information that characterizes an individual person, linking that individual to detailed phenotypic descriptions, genetic information, diagnoses, and treatments. A detailed example is presented that illustrates how to use the schema to represent the clinical course of a patient with retinoblastoma, including demographic information, the clinical diagnosis, phenotypic features and clinical measurements, an examination of the extirpated tumor, therapies, and the results of genomic analysis. The Phenopacket Schema, together with other GA4GH data and technical standards, will enable data exchange and provide a foundation for the computational analysis of disease and phenotype information to improve our ability to diagnose and conduct research on all types of disorders, including cancer and rare diseases.}, language = {en}, number = {n/a}, urldate = {2022-08-30}, journal = {Advanced Genetics}, author = {Ladewig, Markus S. and Jacobsen, Julius O. B. and Wagner, Alex H. and Danis, Daniel and El Kassaby, Baha and Gargano, Michael and Groza, Tudor and Baudis, Michael and Steinhaus, Robin and Seelow, Dominik and Bechrakis, Nikolaos E. and Mungall, Christopher J. and Schofield, Paul N. and Elemento, Olivier and Smith, Lindsay and McMurry, Julie A. and Munoz-Torres, Monica and Haendel, Melissa A. and Robinson, Peter N.}, year = {2022}, note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/ggn2.202200016}, keywords = {FAIR data, Global Alliance for Genomics and Health, Human Phenotype Ontology, Phenopacket Schema, deep phenotyping}, pages = {2200016}, } @misc{cellpress_authors_2021, title = {Author’s guide: {Standardized} datatypes, datatype specific repositories, and general-purpose repositories recommended by {Cell} {Press}}, url = {https://www.cell.com/pb-assets/journals/research/cellpress/data/RecommendRepositories-1621989644133.pdf}, urldate = {2022-08-31}, author = {CellPress}, year = {2021}, } @misc{plos_one_data_2019, title = {Data {Availability}}, url = {https://journals.plos.org/plosone/s/data-availability}, abstract = {PLOS journals require authors to make all data necessary to replicate their study’s findings publicly available without restriction at the time of publication. When specific legal or ethical restrictions prohibit public sharing of a data set, authors must indicate how others may obtain access to the data.}, language = {en}, urldate = {2022-08-31}, journal = {Data Availability {\textbar} PLOS ONE}, author = {PLOS ONE}, year = {2019}, } @misc{band_me_2019, title = {Me vs. the {EGA} part 4: losing again}, url = {https://gavinband.github.io/bioinformatics/data/2019/05/15/Me_versus_the_European_Genome_Phenome_Archive_part_four.html}, urldate = {2022-08-29}, author = {Band, Gavin}, year = {2019}, } @misc{band_me_2019-1, title = {Me vs. the {EGA} part 2: uploading data}, url = {https://gavinband.github.io/bioinformatics/data/2019/05/02/Me_versus_the_European_Genome_Phenome_Archive_part_two.html}, urldate = {2022-08-29}, author = {Band, Gavin}, year = {2019}, } @misc{band_me_2019-2, title = {Me vs. the {EGA} part 3: winning}, url = {https://gavinband.github.io/bioinformatics/data/2019/05/12/Me_versus_the_European_Genome_Phenome_Archive_part_three.html}, urldate = {2022-08-29}, author = {Band, Gavin}, year = {2019}, } @misc{band_me_2019-3, title = {Me vs. {EGA}}, url = {https://gavinband.github.io/bioinformatics/data/2019/05/01/Me_versus_the_European_Genome_Phenome_Archive.html}, urldate = {2022-08-29}, author = {Band, Gavin}, year = {2019}, } @article{wang_gsa_2017, title = {{GSA}: {Genome} {Sequence} {Archive}*}, volume = {15}, issn = {1672-0229}, shorttitle = {{GSA}}, url = {https://www.sciencedirect.com/science/article/pii/S1672022917300025}, doi = {10.1016/j.gpb.2017.01.001}, abstract = {With the rapid development of sequencing technologies towards higher throughput and lower cost, sequence data are generated at an unprecedentedly explosive rate. To provide an efficient and easy-to-use platform for managing huge sequence data, here we present Genome Sequence Archive (GSA; http://bigd.big.ac.cn/gsa or http://gsa.big.ac.cn), a data repository for archiving raw sequence data. In compliance with data standards and structures of the International Nucleotide Sequence Database Collaboration (INSDC), GSA adopts four data objects (BioProject, BioSample, Experiment, and Run) for data organization, accepts raw sequence reads produced by a variety of sequencing platforms, stores both sequence reads and metadata submitted from all over the world, and makes all these data publicly available to worldwide scientific communities. In the era of big data, GSA is not only an important complement to existing INSDC members by alleviating the increasing burdens of handling sequence data deluge, but also takes the significant responsibility for global big data archive and provides free unrestricted access to all publicly available data in support of research activities throughout the world.}, language = {en}, number = {1}, urldate = {2022-08-24}, journal = {Genomics, Proteomics \& Bioinformatics}, author = {Wang, Yanqing and Song, Fuhai and Zhu, Junwei and Zhang, Sisi and Yang, Yadong and Chen, Tingting and Tang, Bixia and Dong, Lili and Ding, Nan and Zhang, Qian and Bai, Zhouxian and Dong, Xunong and Chen, Huanxin and Sun, Mingyuan and Zhai, Shuang and Sun, Yubin and Yu, Lei and Lan, Li and Xiao, Jingfa and Fang, Xiangdong and Lei, Hongxing and Zhang, Zhang and Zhao, Wenming}, month = feb, year = {2017}, keywords = {Big data, GSA, Genome Sequence Archive, INSDC, Raw sequence data}, pages = {14--18}, } @article{arita_international_2020, title = {The international nucleotide sequence database collaboration}, volume = {49}, issn = {0305-1048}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7778961/}, doi = {10.1093/nar/gkaa967}, abstract = {The International Nucleotide Sequence Database Collaboration (INSDC; http://www.insdc.org/) has been the core infrastructure for collecting and providing nucleotide sequence data and metadata for {\textgreater}30 years. Three partner organizations, the DNA Data Bank of Japan (DDBJ) at the National Institute of Genetics in Mishima, Japan; the European Nucleotide Archive (ENA) at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI) in Hinxton, UK; and GenBank at National Center for Biotechnology Information (NCBI), National Library of Medicine, National Institutes of Health in Bethesda, Maryland, USA have been collaboratively maintaining the INSDC for the benefit of not only science but all types of community worldwide.}, number = {D1}, urldate = {2022-08-17}, journal = {Nucleic Acids Research}, author = {Arita, Masanori and Karsch-Mizrachi, Ilene and Cochrane, Guy}, month = nov, year = {2020}, pmid = {33166387}, pmcid = {PMC7778961}, keywords = {DDBJ, ENA, GenBank, SRA, archive, insdc, repository}, pages = {D121--D124}, } @article{byrd_responsible_2020, title = {Responsible, practical genomic data sharing that accelerates research.}, volume = {21}, issn = {1471-0056}, url = {http://www.nature.com/articles/s41576-020-0257-5}, doi = {10.1038/s41576-020-0257-5}, abstract = {Data sharing anchors reproducible science, but expectations and best practices are often nebulous. Communities of funders, researchers and publishers continue to grapple with what should be required or encouraged. To illuminate the rationales for sharing data, the technical challenges and the social and cultural challenges, we consider the stakeholders in the scientific enterprise. In biomedical research, participants are key among those stakeholders. Ethical sharing requires considering both the value of research efforts and the privacy costs for participants. We discuss current best practices for various types of genomic data, as well as opportunities to promote ethical data sharing that accelerates science by aligning incentives.}, number = {10}, urldate = {2020-07-23}, journal = {Nature Reviews. Genetics}, author = {Byrd, James Brian and Greene, Anna C and Prasad, Deepashree Venkatesh and Jiang, Xiaoqian and Greene, Casey S}, month = oct, year = {2020}, keywords = {consent, governance, legal, privacy, sharing}, pages = {615--629}, } @article{bull_ethics_2020, title = {The ethics of data sharing and biobanking in health research.}, volume = {5}, url = {http://dx.doi.org/10.12688/wellcomeopenres.16351.1}, doi = {10.12688/wellcomeopenres.16351.1}, abstract = {The importance of data sharing and biobanking are increasingly being recognised in global health research. Such practices are perceived to have the potential to promote science by maximising the utility of data and samples. However, they also raise ethical challenges which can be exacerbated by existing disparities in power, infrastructure and capacity. The Global Forum on Bioethics in Research (GFBR) convened in Stellenbosch, South Africa in November 2018, to explore the ethics of data sharing and biobanking in health research. Ninety-five participants from 35 countries drew on case studies and their experiences with sharing in their discussion of issues relating to respecting research participants and communities, promoting equitable sharing, and international and national approaches to governing data sharing and biobanking. In this editorial we will briefly review insights relating to each of these three themes. Copyright: © 2020 Bull S and Bhagwandin N.}, urldate = {2021-10-19}, journal = {Wellcome Open Research}, author = {Bull, Susan and Bhagwandin, Niresh}, month = nov, year = {2020}, keywords = {culture, ethics, privacy, sharing}, pages = {270}, } @misc{rights_ocr_hipaa_2008, title = {The {HIPAA} {Privacy} {Rule}}, url = {https://www.hhs.gov/hipaa/for-professionals/privacy/index.html}, abstract = {The HIPAA Privacy Rule}, urldate = {2021-10-18}, journal = {HHS.gov}, author = {Rights (OCR), Office for Civil}, month = may, year = {2008}, keywords = {legal, privacy}, } @article{eckstein_australia_2018, title = {Australia: regulating genomic data sharing to promote public trust.}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1914-z}, doi = {10.1007/s00439-018-1914-z}, abstract = {The regulation of genomic data sharing in Australia is a confusing mix of common law, legislation, ethical guidelines, and codes of practice. Beyond privacy laws, which only apply to genomic data that meets the definition of personal information, the key regulatory lever is the National Health and Medical Research Council (NHMRC) National Statement for Ethical Conduct in Human Research ("National Statement") (2007). Compliance with the National Statement is a requirement for institutions to apply to the NHMRC for funding, and includes-among other things-requirements for review of most genomic research by Human Research Ethics Committees. The sections of the National Statement specifying requirements for research with human genomic data are currently under review, including proposed new requirements addressing the return of genetic research findings and oversight of transfer agreements. Ensuring the willingness of Australians to donate their genomic information and participate in medical research will require clarification and harmonisation of the applicable regulatory framework, along with reforms to ensure that these regulations reflect the conditions necessary to promote ongoing public trust in researchers and institutions.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Eckstein, Lisa and Chalmers, Donald and Critchley, Christine and Jeanneret, Ruthie and McWhirter, Rebekah and Nielsen, Jane and Otlowski, Margaret and Nicol, Dianne}, month = aug, year = {2018}, keywords = {Privacy Act, legal, privacy}, pages = {583--591}, } @article{tucker_protecting_2016, title = {Protecting patient privacy when sharing patient-level data from clinical trials.}, volume = {16 Suppl 1}, url = {http://dx.doi.org/10.1186/s12874-016-0169-4}, doi = {10.1186/s12874-016-0169-4}, abstract = {BACKGROUND: Greater transparency and, in particular, sharing of patient-level data for further scientific research is an increasingly important topic for the pharmaceutical industry and other organisations who sponsor and conduct clinical trials as well as generally in the interests of patients participating in studies. A concern remains, however, over how to appropriately prepare and share clinical trial data with third party researchers, whilst maintaining patient confidentiality. Clinical trial datasets contain very detailed information on each participant. Risk to patient privacy can be mitigated by data reduction techniques. However, retention of data utility is important in order to allow meaningful scientific research. In addition, for clinical trial data, an excessive application of such techniques may pose a public health risk if misleading results are produced. After considering existing guidance, this article makes recommendations with the aim of promoting an approach that balances data utility and privacy risk and is applicable across clinical trial data holders. DISCUSSION: Our key recommendations are as follows: 1. Data anonymisation/de-identification: Data holders are responsible for generating de-identified datasets which are intended to offer increased protection for patient privacy through masking or generalisation of direct and some indirect identifiers. 2. Controlled access to data, including use of a data sharing agreement: A legally binding data sharing agreement should be in place, including agreements not to download or further share data and not to attempt to seek to identify patients. Appropriate levels of security should be used for transferring data or providing access; one solution is use of a secure 'locked box' system which provides additional safeguards. This article provides recommendations on best practices to de-identify/anonymise clinical trial data for sharing with third-party researchers, as well as controlled access to data and data sharing agreements. The recommendations are applicable to all clinical trial data holders. Further work will be needed to identify and evaluate competing possibilities as regulations, attitudes to risk and technologies evolve.}, urldate = {2021-11-03}, journal = {BMC Medical Research Methodology}, author = {Tucker, Katherine and Branson, Janice and Dilleen, Maria and Hollis, Sally and Loughlin, Paul and Nixon, Mark J and Williams, Zoë}, month = jul, year = {2016}, keywords = {legal, privacy}, pages = {77}, } @article{bovenberg_how_2020, title = {How to fix the {GDPR}'s frustration of global biomedical research}, volume = {370}, url = {https://www.science.org/doi/10.1126/science.abd2499}, doi = {10.1126/science.abd2499}, number = {6512}, urldate = {2022-08-17}, journal = {Science}, author = {Bovenberg, Jasper and Peloquin, David and Bierer, Barbara and Barnes, Mark and Knoppers, Bartha Maria}, month = oct, year = {2020}, note = {Publisher: American Association for the Advancement of Science}, keywords = {GDPR, privacy}, pages = {40--42}, } @article{vayena_between_2016, title = {Between openness and privacy in genomics.}, volume = {13}, url = {http://dx.doi.org/10.1371/journal.pmed.1001937}, doi = {10.1371/journal.pmed.1001937}, number = {1}, urldate = {2021-10-18}, journal = {PLoS Medicine}, author = {Vayena, Effy and Gasser, Urs}, month = jan, year = {2016}, keywords = {Anxiety, Data management, Genome analysis, Genomics, Global health, Health care policy, Medical risk factors, Symbiosis, legal, privacy}, pages = {e1001937}, } @article{townend_conclusion_2018, title = {Conclusion: harmonisation in genomic and health data sharing for research: an impossible dream?}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1924-x}, doi = {10.1007/s00439-018-1924-x}, abstract = {There are clear benefits from genomics and health data sharing in research and in therapy for individuals across societies. At the same time, citizens have different expectations and fears about that data sharing. International legislation in relation with research ethics and practice and, particularly, data protection create a particular environment that, as is seen in the articles in part two of this special issue, are crying out for harmonisation both at a procedural but at fundamental conceptual levels. The law of data sharing is pulling in different directions. This paper poses the question, 'harmonisation, an impossible dream?' and the answer is a qualified 'no'. The paper reflects on what can be seen in the papers in part two of the special issue. It then identifies three major areas of conceptual uncertainty in the new EU General Data Protection Regulation (not because it has superiority over other jurisdictions, but because it is a recent revision of data protection law that leaves universal conceptual questions unclear). Thereafter, the potential for Artificial Intelligence to meet some of the shortcomings is discussed. The paper ends with a consideration of the conditions under which data sharing harmonisation might be achieved: an understanding of a human rights approach and citizen sensitivities in considering the 'public interest'; social liberalism as a basis of solidarity; and the profession of 'researcher'.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Townend, David}, month = aug, year = {2018}, keywords = {sharing, standards}, pages = {657--664}, } @article{tedersoo_data_2021, title = {Data sharing practices and data availability upon request differ across scientific disciplines.}, volume = {8}, issn = {2052-4463}, url = {http://www.nature.com/articles/s41597-021-00981-0}, doi = {10.1038/s41597-021-00981-0}, abstract = {Data sharing is one of the cornerstones of modern science that enables large-scale analyses and reproducibility. We evaluated data availability in research articles across nine disciplines in Nature and Science magazines and recorded corresponding authors' concerns, requests and reasons for declining data sharing. Although data sharing has improved in the last decade and particularly in recent years, data availability and willingness to share data still differ greatly among disciplines. We observed that statements of data availability upon (reasonable) request are inefficient and should not be allowed by journals. To improve data sharing at the time of manuscript acceptance, researchers should be better motivated to release their data with real benefits such as recognition, or bonus points in grant and job applications. We recommend that data management costs should be covered by funding agencies; publicly available research data ought to be included in the evaluation of applications; and surveillance of data sharing should be enforced by both academic publishers and funders. These cross-discipline survey data are available from the plutoF repository. © 2021. The Author(s).}, number = {1}, urldate = {2021-10-19}, journal = {Scientific data}, author = {Tedersoo, Leho and Küngas, Rainer and Oras, Ester and Köster, Kajar and Eenmaa, Helen and Leijen, Äli and Pedaste, Margus and Raju, Marju and Astapova, Anastasiya and Lukner, Heli and Kogermann, Karin and Sepp, Tuul}, month = jul, year = {2021}, keywords = {culture, sharing}, pages = {192}, } @misc{rights_ocr_guidance_2012, title = {Guidance {Regarding} {Methods} for {De}-identification of {Protected} {Health} {Information} in {Accordance} with the {Health} {Insurance} {Portability} and {Accountability} {Act} ({HIPAA}) {Privacy} {Rule}}, url = {https://www.hhs.gov/hipaa/for-professionals/privacy/special-topics/de-identification/index.html}, abstract = {This page provides guidance about methods and approaches to achieve de-identification in accordance with the Health Insurance Portability and Accountability Act of 1996 (HIPAA) Privacy Rule. The guidance explains and answers questions regarding the two methods that can be used to satisfy the Privacy Rule’s de-identification standard: Expert Determination and Safe Harbor . This guidance is intended to assist covered entities to understand what is de-identification, the general process by which de-identified information is created, and the options available for performing de-identification.}, urldate = {2021-10-18}, journal = {HHS.gov}, author = {Rights (OCR), Office for Civil}, month = sep, year = {2012}, keywords = {legal, privacy}, } @article{oestreich_privacy_2021, title = {Privacy considerations for sharing genomics data.}, volume = {20}, url = {https://www.excli.de/index.php/excli/article/view/4002}, doi = {10.17179/excli2021-4002}, abstract = {An increasing amount of attention has been geared towards understanding the privacy risks that arise from sharing genomic data of human origin. Most of these efforts have focused on issues in the context of genomic sequence data, but the popularity of techniques for collecting other types of genome-related data has prompted researchers to investigate privacy concerns in a broader genomic context. In this review, we give an overview of different types of genome-associated data, their individual ways of revealing sensitive information, the motivation to share them as well as established and upcoming methods to minimize information leakage. We further discuss the concise threats that are being posed, who is at risk, and how the risk level compares to potential benefits, all while addressing the topic in the context of modern technology, methodology, and information sharing culture. Additionally, we will discuss the current legal situation regarding the sharing of genomic data in a selection of countries, evaluating the scope of their applicability as well as their limitations. We will finalize this review by evaluating the development that is required in the scientific field in the near future in order to improve and develop privacy-preserving data sharing techniques for the genomic context. Copyright © 2021 Oestreich et al.}, urldate = {2021-10-18}, journal = {EXCLI journal}, author = {Oestreich, Marie and Chen, Dingfan and Schultze, Joachim L and Fritz, Mario and Becker, Matthias}, month = jul, year = {2021}, keywords = {GDPR, legal, privacy}, pages = {1243--1260}, } @article{tryka_ncbis_2014, title = {{NCBI}'s {Database} of {Genotypes} and {Phenotypes}: {dbGaP}.}, volume = {42}, url = {http://dx.doi.org/10.1093/nar/gkt1211}, doi = {10.1093/nar/gkt1211}, abstract = {The Database of Genotypes and Phenotypes (dbGap, http://www.ncbi.nlm.nih.gov/gap) is a National Institutes of Health-sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype. Information in dbGaP is organized as a hierarchical structure and includes the accessioned objects, phenotypes (as variables and datasets), various molecular assay data (SNP and Expression Array data, Sequence and Epigenomic marks), analyses and documents. Publicly accessible metadata about submitted studies, summary level data, and documents related to studies can be accessed freely on the dbGaP website. Individual-level data are accessible via Controlled Access application to scientists across the globe.}, number = {Database issue}, urldate = {2022-01-04}, journal = {Nucleic Acids Research}, author = {Tryka, Kimberly A and Hao, Luning and Sturcke, Anne and Jin, Yumi and Wang, Zhen Y and Ziyabari, Lora and Lee, Moira and Popova, Natalia and Sharopova, Nataliya and Kimura, Masato and Feolo, Michael}, year = {2014}, pages = {D975--9}, } @article{dove_raising_2021, title = {Raising standards for global data-sharing}, volume = {371}, url = {https://www.science.org/doi/10.1126/science.abf4286}, doi = {10.1126/science.abf4286}, number = {6525}, urldate = {2022-08-17}, journal = {Science}, author = {Dove, Edward S. and Chen, Jiahong and Loideain, Nóra Ni}, month = jan, year = {2021}, note = {Publisher: American Association for the Advancement of Science}, keywords = {GDPR, privacy, sharing}, pages = {133--134}, } @article{sayers_genbank_2022, title = {{GenBank}}, volume = {50}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkab1135}, doi = {10.1093/nar/gkab1135}, abstract = {GenBank® (https://www.ncbi.nlm.nih.gov/genbank/) is a comprehensive, public database that contains 15.3 trillion base pairs from over 2.5 billion nucleotide sequences for 504 000 formally described species. Recent updates include resources for data from the SARS-CoV-2 virus, including a SARS-CoV-2 landing page, NCBI Datasets, NCBI Virus and the Submission Portal. We also discuss upcoming changes to GI identifiers, a new data management interface for BioProject, and advice for providing contextual metadata in submissions.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Sayers, Eric W and Cavanaugh, Mark and Clark, Karen and Pruitt, Kim D and Schoch, Conrad L and Sherry, Stephen T and Karsch-Mizrachi, Ilene}, month = jan, year = {2022}, keywords = {NCBI, archive, repository}, pages = {D161--D164}, } @article{vaske_data_2019, title = {Data sharing for pediatric cancers}, volume = {363}, url = {https://www.science.org/doi/10.1126/science.aax2739}, doi = {10.1126/science.aax2739}, number = {6432}, urldate = {2022-08-17}, journal = {Science}, author = {Vaske, Olena Morozova and Haussler, David}, month = mar, year = {2019}, note = {Publisher: American Association for the Advancement of Science}, keywords = {legal, privacy, sharing}, pages = {1125--1125}, } @article{amann_toward_2019, title = {Toward unrestricted use of public genomic data}, volume = {363}, url = {https://www.science.org/doi/10.1126/science.aaw1280}, doi = {10.1126/science.aaw1280}, number = {6425}, urldate = {2022-08-17}, journal = {Science}, author = {Amann, Rudolf I. and Baichoo, Shakuntala and Blencowe, Benjamin J. and Bork, Peer and Borodovsky, Mark and Brooksbank, Cath and Chain, Patrick S. G. and Colwell, Rita R. and Daffonchio, Daniele G. and Danchin, Antoine and de Lorenzo, Victor and Dorrestein, Pieter C. and Finn, Robert D. and Fraser, Claire M. and Gilbert, Jack A. and Hallam, Steven J. and Hugenholtz, Philip and Ioannidis, John P. A. and Jansson, Janet K. and Kim, Jihyun F. and Klenk, Hans-Peter and Klotz, Martin G. and Knight, Rob and Konstantinidis, Konstantinos T. and Kyrpides, Nikos C. and Mason, Christopher E. and McHardy, Alice C. and Meyer, Folker and Ouzounis, Christos A. and Patrinos, Aristides A. N. and Podar, Mircea and Pollard, Katherine S. and Ravel, Jacques and Muñoz, Alejandro Reyes and Roberts, Richard J. and Rosselló-Móra, Ramon and Sansone, Susanna-Assunta and Schloss, Patrick D. and Schriml, Lynn M. and Setubal, João C. and Sorek, Rotem and Stevens, Rick L. and Tiedje, James M. and Turjanski, Adrian and Tyson, Gene W. and Ussery, David W. and Weinstock, George M. and White, Owen and Whitman, William B. and Xenarios, Ioannis}, month = jan, year = {2019}, note = {Publisher: American Association for the Advancement of Science}, keywords = {GDPR, legal, privacy, sharing}, pages = {350--352}, } @article{amann_consent_2019, title = {Consent insufficient for data release—{Response}}, volume = {364}, url = {https://www.science.org/doi/10.1126/science.aax7509}, doi = {10.1126/science.aax7509}, number = {6439}, urldate = {2022-08-17}, journal = {Science}, author = {Amann, Rudolf I. and Baichoo, Shakuntala and Blencowe, Benjamin J. and Bork, Peer and Borodovsky, Mark and Brooksbank, Cath and Chain, Patrick S. G. and Colwell, Rita R. and Daffonchio, Daniele G. and Danchin, Antoine and de Lorenzo, Victor and Dorrestein, Pieter C. and Finn, Robert D. and Fraser, Claire M. and Gilbert, Jack A. and Hallam, Steven J. and Hugenholtz, Philip and Ioannidis, John P. A. and Jansson, Janet K. and Kim, Jihyun F. and Klenk, Hans-Peter and Klotz, Martin G. and Knight, Rob and Konstantinidis, Konstantinos T. and Kyrpides, Nikos C. and Mason, Christopher E. and McHardy, Alice C. and Meyer, Folker and Ouzounis, Christos A. and Patrinos, Aristides A. N. and Podar, Mircea and Pollard, Katherine S. and Ravel, Jacques and Muñoz, Alejandro Reyes and Roberts, Richard J. and Rosselló-Móra, Ramon and Sansone, Susanna-Assunta and Schloss, Patrick D. and Schriml, Lynn M. and Setubal, João C. and Sorek, Rotem and Stevens, Rick L. and Tiedje, James M. and Turjanski, Adrian and Tyson, Gene W. and Ussery, David W. and Weinstock, George M. and White, Owen and Whitman, William B. and Xenarios, Ioannis}, month = may, year = {2019}, note = {Publisher: American Association for the Advancement of Science}, keywords = {legal, privacy, sharing}, pages = {446--446}, } @article{nicol_consent_2019, title = {Consent insufficient for data release}, volume = {364}, url = {https://www.science.org/doi/10.1126/science.aax0892}, doi = {10.1126/science.aax0892}, number = {6439}, urldate = {2022-08-17}, journal = {Science}, author = {Nicol, Dianne and Eckstein, Lisa and Bentzen, Heidi Beate and Borry, Pascal and Burgess, Mike and Burke, Wylie and Chalmers, Don and Cho, Mildred and Dove, Edward and Fullerton, Stephanie and Ida, Ryuchi and Kato, Kazuto and Kaye, Jane and Koenig, Barbara and Manson, Spero and McGrail, Kimberlyn and Meslin, Eric and O'Doherty, Kieran and Prainsack, Barbara and Shabani, Mahsa and Tabor, Holly and Thorogood, Adrian and de Vries, Jantina}, month = may, year = {2019}, note = {Publisher: American Association for the Advancement of Science}, keywords = {GDPR, consent, legal, privacy}, pages = {445--446}, } @article{sim_time_2020, title = {Time for {NIH} to lead on data sharing}, volume = {367}, url = {https://www.science.org/doi/10.1126/science.aba4456}, doi = {10.1126/science.aba4456}, number = {6484}, urldate = {2022-08-17}, journal = {Science}, author = {Sim, Ida and Stebbins, Michael and Bierer, Barbara E. and Butte, Atul J. and Drazen, Jeffrey and Dzau, Victor and Hernandez, Adrian F. and Krumholz, Harlan M. and Lo, Bernard and Munos, Bernard and Perakslis, Eric and Rockhold, Frank and Ross, Joseph S. and Terry, Sharon F. and Yamamoto, Keith R. and Zarin, Deborah A. and Li, Rebecca}, month = mar, year = {2020}, note = {Publisher: American Association for the Advancement of Science}, keywords = {sharing}, pages = {1308--1309}, } @article{the_global_alliance_for_genomics_and_health_federated_2016, title = {A federated ecosystem for sharing genomic, clinical data}, volume = {352}, url = {https://www.science.org/doi/10.1126/science.aaf6162}, doi = {10.1126/science.aaf6162}, number = {6291}, urldate = {2022-08-17}, journal = {Science}, author = {{THE GLOBAL ALLIANCE FOR GENOMICS AND HEALTH}}, month = jun, year = {2016}, note = {Publisher: American Association for the Advancement of Science}, keywords = {GA4GH, privacy, sharing, standards}, pages = {1278--1280}, } @article{thorogood_canada_2018, title = {Canada: will privacy rules continue to favour open science?}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1905-0}, doi = {10.1007/s00439-018-1905-0}, abstract = {Canada's regulatory frameworks governing privacy and research are generally permissive of genomic data sharing, though they may soon be tightened in response to public concerns over commercial data handling practices and the strengthening of influential European privacy laws. Regulation can seem complex and uncertain, in part because of the constitutional division of power between federal and provincial governments over both privacy and health care. Broad consent is commonly practiced in genomic research, but without explicit regulatory recognition, it is often scrutinized by research or privacy oversight bodies. Secondary use of health-care data is legally permissible under limited circumstances. A new federal law prohibits genetic discrimination, but is subject to a constitutional challenge. Privacy laws require security safeguards proportionate to the data sensitivity, including breach notification. Special categories of data are not defined a priori. With some exceptions, Canadian researchers are permitted to share personal information internationally but are held accountable for safeguarding the privacy and security of these data. Cloud computing to store and share large scale data sets is permitted, if shared responsibilities for access, responsible use, and security are carefully articulated. For the moment, Canada's commercial sector is recognized as "adequate" by Europe, facilitating import of European data. Maintaining adequacy status under the new European General Data Protection Regulation (GDPR) is a concern because of Canada's weaker individual rights, privacy protections, and regulatory enforcement. Researchers must stay attuned to shifting international and national regulations to ensure a sustainable future for responsible genomic data sharing.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Thorogood, Adrian}, month = aug, year = {2018}, keywords = {GDPR, privacy, sharing}, pages = {595--602}, } @article{phillips_international_2018, title = {International data-sharing norms: from the {OECD} to the {General} {Data} {Protection} {Regulation} ({GDPR}).}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1919-7}, doi = {10.1007/s00439-018-1919-7}, abstract = {The evolution of genomic research and its integration into clinical practice, as they become international-even global-endeavors, has brought us to a place where scientists and clinicians may now only ignore the rules governing international data sharing at their own peril. Open data policies, on the one hand, increasingly require custodians of others' genomic data to make it as widely available as feasible, including to researchers in other countries. Data protection law, on the other, has become a significant hurdle to the sharing of personal data across jurisdictional borders. The space between these two competing duties is narrowing. In contrast with the other texts in this volume, which explore the present and future of data sharing and data protection, this article's focus is on the past. It centres on the historical development of the data protection rules regarding the international transfer of personal data up to the present. The article's aim is to bring into focus the underlying objectives that have influenced and that will continue to influence the way that data protection rules are applied to the fields of genomics and health, as well as future developments in data protection generally. The first part of this article describes the development of international data-sharing data protection rules since 1970. The second considers difficulties in applying general data protection rules to the specific context of genomics and health. The third and final part compares the options available to comply with the international transfer restrictions set out in the standard-setting EU General Data Protection Regulation from a genomics perspective.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Phillips, Mark}, month = aug, year = {2018}, keywords = {GDPR, privacy, sharing}, pages = {575--582}, } @article{molnar-gabor_germany_2018, title = {Germany: a fair balance between scientific freedom and data subjects' rights?}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1912-1}, doi = {10.1007/s00439-018-1912-1}, abstract = {With the German Bundestag's adoption of the Data Protection Adaptation and Implementation Act EU (DSAnpUG-EU) on 30 June 2017, the adaptation of German law to the General Data Protection Regulation (GDPR) has begun (Gesetz zur Anpassung des Datenschutzrechts an die Verordnung (EU) 2016/679 und zur Umsetzung der Richtlinie (EU) 2016/680 (Datenschutz-Anpassungs- und -Umsetzungsgesetz-DSAnpUG-EU) v. 30. Juni 2017, BGBl. 2017 I p. 2097 et seq.). Despite being directly binding on all EU member states, the GDPR does not render national data protection provision obsolete-they are covered by the GDPR's opening clauses which include regulatory mandates and room for derogation. This creates considerable need for national legislative adaptation. Art. 1 DSAnpUG-EU contains the necessary amendments to the Federal Data Protection Law (BDSG(neu)), thus creating the second major building block of future German data protection alongside the GDPR itself. Nevertheless, there are still numerous sector-specific regulations in other federal laws and the data protection laws of the 16 states also need amendments. Adjustment in Germany is well on its way, but implementation in general is still ongoing, with further consequences for data processing and sharing.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Molnár-Gábor, Fruzsina}, month = aug, year = {2018}, keywords = {GDPR}, pages = {619--626}, } @article{kalkman_responsible_2019, title = {Responsible data sharing in international health research: a systematic review of principles and norms.}, volume = {20}, url = {http://dx.doi.org/10.1186/s12910-019-0359-9}, doi = {10.1186/s12910-019-0359-9}, abstract = {BACKGROUND: Large-scale linkage of international clinical datasets could lead to unique insights into disease aetiology and facilitate treatment evaluation and drug development. Hereto, multi-stakeholder consortia are currently designing several disease-specific translational research platforms to enable international health data sharing. Despite the recent adoption of the EU General Data Protection Regulation (GDPR), the procedures for how to govern responsible data sharing in such projects are not at all spelled out yet. In search of a first, basic outline of an ethical governance framework, we set out to explore relevant ethical principles and norms. METHODS: We performed a systematic review of literature and ethical guidelines for principles and norms pertaining to data sharing for international health research. RESULTS: We observed an abundance of principles and norms with considerable convergence at the aggregate level of four overarching themes: societal benefits and value; distribution of risks, benefits and burdens; respect for individuals and groups; and public trust and engagement. However, at the level of principles and norms we identified substantial variation in the phrasing and level of detail, the number and content of norms considered necessary to protect a principle, and the contextual approaches in which principles and norms are used. CONCLUSIONS: While providing some helpful leads for further work on a coherent governance framework for data sharing, the current collection of principles and norms prompts important questions about how to streamline terminology regarding de-identification and how to harmonise the identified principles and norms into a coherent governance framework that promotes data sharing while securing public trust.}, number = {1}, urldate = {2021-10-18}, journal = {BMC medical ethics}, author = {Kalkman, Shona and Mostert, Menno and Gerlinger, Christoph and van Delden, Johannes J M and van Thiel, Ghislaine J M W}, month = mar, year = {2019}, keywords = {Big data, Data sharing, Ethical governance, GDPR, Research ethics, Secondary use}, pages = {21}, } @article{brunak_nucleotide_2002, title = {Nucleotide {Sequence} {Database} {Policies}}, volume = {298}, url = {https://www.science.org/doi/10.1126/science.298.5597.1333b}, doi = {10.1126/science.298.5597.1333b}, number = {5597}, urldate = {2022-08-17}, journal = {Science}, author = {Brunak, Soren and Danchin, Antoine and Hattori, Masahira and Nakamura, Haruki and Shinozaki, Kazuo and Matise, Tara and Preuss, Daphne}, month = nov, year = {2002}, note = {Publisher: American Association for the Advancement of Science}, pages = {1333--1333}, } @article{deutsch_proteomexchange_2020, title = {The {ProteomeXchange} consortium in 2020: enabling ‘big data’ approaches in proteomics}, volume = {48}, issn = {0305-1048}, shorttitle = {The {ProteomeXchange} consortium in 2020}, url = {https://doi.org/10.1093/nar/gkz984}, doi = {10.1093/nar/gkz984}, abstract = {The ProteomeXchange (PX) consortium of proteomics resources (http://www.proteomexchange.org) has standardized data submission and dissemination of mass spectrometry proteomics data worldwide since 2012. In this paper, we describe the main developments since the previous update manuscript was published in Nucleic Acids Research in 2017. Since then, in addition to the four PX existing members at the time (PRIDE, PeptideAtlas including the PASSEL resource, MassIVE and jPOST), two new resources have joined PX: iProX (China) and Panorama Public (USA). We first describe the updated submission guidelines, now expanded to include six members. Next, with current data submission statistics, we demonstrate that the proteomics field is now actively embracing public open data policies. At the end of June 2019, more than 14 100 datasets had been submitted to PX resources since 2012, and from those, more than 9 500 in just the last three years. In parallel, an unprecedented increase of data re-use activities in the field, including ‘big data’ approaches, is enabling novel research and new data resources. At last, we also outline some of our future plans for the coming years.}, number = {D1}, urldate = {2022-08-17}, journal = {Nucleic Acids Research}, author = {Deutsch, Eric W and Bandeira, Nuno and Sharma, Vagisha and Perez-Riverol, Yasset and Carver, Jeremy J and Kundu, Deepti J and García-Seisdedos, David and Jarnuczak, Andrew F and Hewapathirana, Suresh and Pullman, Benjamin S and Wertz, Julie and Sun, Zhi and Kawano, Shin and Okuda, Shujiro and Watanabe, Yu and Hermjakob, Henning and MacLean, Brendan and MacCoss, Michael J and Zhu, Yunping and Ishihama, Yasushi and Vizcaíno, Juan A}, month = jan, year = {2020}, pages = {D1145--D1152}, } @article{streeter_human-induced_2017, title = {The human-induced pluripotent stem cell initiative—data resources for cellular genetics}, volume = {45}, issn = {0305-1048}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5210631/}, doi = {10.1093/nar/gkw928}, abstract = {The Human Induced Pluripotent Stem Cell Initiative (HipSci) isf establishing a large catalogue of human iPSC lines, arguably the most well characterized collection to date. The HipSci portal enables researchers to choose the right cell line for their experiment, and makes HipSci's rich catalogue of assay data easy to discover and reuse. Each cell line has genomic, transcriptomic, proteomic and cellular phenotyping data. Data are deposited in the appropriate EMBL-EBI archives, including the European Nucleotide Archive (ENA), European Genome-phenome Archive (EGA), ArrayExpress and PRoteomics IDEntifications (PRIDE) databases. The project will make 500 cell lines from healthy individuals, and from 150 patients with rare genetic diseases; these will be available through the European Collection of Authenticated Cell Cultures (ECACC). As of August 2016, 238 cell lines are available for purchase. Project data is presented through the HipSci data portal (http://www.hipsci.org/lines) and is downloadable from the associated FTP site (ftp://ftp.hipsci.ebi.ac.uk/vol1/ftp). The data portal presents a summary matrix of the HipSci cell lines, showing available data types. Each line has its own page containing descriptive metadata, quality information, and links to archived assay data. Analysis results are also available in a Track Hub, allowing visualization in the context of public genomic annotations (http://www.hipsci.org/data/trackhubs).}, number = {Database issue}, urldate = {2022-08-17}, journal = {Nucleic Acids Research}, author = {Streeter, Ian and Harrison, Peter W. and Faulconbridge, Adam and Flicek, Paul and Parkinson, Helen and Clarke, Laura}, month = jan, year = {2017}, pmid = {27733501}, pmcid = {PMC5210631}, pages = {D691--D697}, } @article{hartley_bioimage_2022, series = {Computation {Resources} for {Molecular} {Biology}}, title = {The {BioImage} {Archive} – {Building} a {Home} for {Life}-{Sciences} {Microscopy} {Data}}, volume = {434}, issn = {0022-2836}, url = {https://www.sciencedirect.com/science/article/pii/S0022283622000791}, doi = {10.1016/j.jmb.2022.167505}, abstract = {Despite the huge impact of data resources in genomics and structural biology, until now there has been no central archive for biological data for all imaging modalities. The BioImage Archive is a new data resource at the European Bioinformatics Institute (EMBL-EBI) designed to fill this gap. In its initial development BioImage Archive accepts bioimaging data associated with publications, in any format, from any imaging modality from the molecular to the organism scale, excluding medical imaging. The BioImage Archive will ensure reproducibility of published studies that derive results from image data and reduce duplication of effort. Most importantly, the BioImage Archive will help scientists to generate new insights through reuse of existing data to answer new biological questions, and provision of training, testing and benchmarking data for development of tools for image analysis. The archive is available at https://www.ebi.ac.uk/bioimage-archive/.}, language = {en}, number = {11}, urldate = {2022-08-16}, journal = {Journal of Molecular Biology}, author = {Hartley, Matthew and Kleywegt, Gerard J. and Patwardhan, Ardan and Sarkans, Ugis and Swedlow, Jason R. and Brazma, Alvis}, month = jun, year = {2022}, keywords = {FAIR, database, imaging, microscopy, open}, pages = {167505}, } @article{haug_metabolights_2020, title = {{MetaboLights}: a resource evolving in response to the needs of its scientific community}, volume = {48}, issn = {0305-1048}, shorttitle = {{MetaboLights}}, url = {https://doi.org/10.1093/nar/gkz1019}, doi = {10.1093/nar/gkz1019}, abstract = {MetaboLights is a database for metabolomics studies, their raw experimental data and associated metadata. The database is cross-species and cross-technique and it covers metabolite structures and their reference spectra as well as their biological roles and locations. MetaboLights is the recommended metabolomics repository for a number of leading journals and ELIXIR, the European infrastructure for life science information. In this article, we describe the significant updates that we have made over the last two years to the resource to respond to the increasing amount and diversity of data being submitted by the metabolomics community. We refreshed the website and most importantly, our submission process was completely overhauled to enable us to deliver a far more user-friendly submission process and to facilitate the growing demand for reproducibility and integration with other ‘omics. Metabolomics resources and data are available under the EMBL-EBI’s Terms of Use via the web at https://www.ebi.ac.uk/metabolights and under Apache 2.0 at Github (https://github.com/EBI-Metabolights/).}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Haug, Kenneth and Cochrane, Keeva and Nainala, Venkata Chandrasekhar and Williams, Mark and Chang, Jiakang and Jayaseelan, Kalai Vanii and O’Donovan, Claire}, month = jan, year = {2020}, pages = {D440--D444}, } @article{perez-riverol_pride_2022, title = {The {PRIDE} database resources in 2022: a hub for mass spectrometry-based proteomics evidences}, volume = {50}, issn = {0305-1048}, shorttitle = {The {PRIDE} database resources in 2022}, url = {https://doi.org/10.1093/nar/gkab1038}, doi = {10.1093/nar/gkab1038}, abstract = {The PRoteomics IDEntifications (PRIDE) database (https://www.ebi.ac.uk/pride/) is the world's largest data repository of mass spectrometry-based proteomics data. PRIDE is one of the founding members of the global ProteomeXchange (PX) consortium and an ELIXIR core data resource. In this manuscript, we summarize the developments in PRIDE resources and related tools since the previous update manuscript was published in Nucleic Acids Research in 2019. The number of submitted datasets to PRIDE Archive (the archival component of PRIDE) has reached on average around 500 datasets per month during 2021. In addition to continuous improvements in PRIDE Archive data pipelines and infrastructure, the PRIDE Spectra Archive has been developed to provide direct access to the submitted mass spectra using Universal Spectrum Identifiers. As a key point, the file format MAGE-TAB for proteomics has been developed to enable the improvement of sample metadata annotation. Additionally, the resource PRIDE Peptidome provides access to aggregated peptide/protein evidences across PRIDE Archive. Furthermore, we will describe how PRIDE has increased its efforts to reuse and disseminate high-quality proteomics data into other added-value resources such as UniProt, Ensembl and Expression Atlas.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Perez-Riverol, Yasset and Bai, Jingwen and Bandla, Chakradhar and García-Seisdedos, David and Hewapathirana, Suresh and Kamatchinathan, Selvakumar and Kundu, Deepti J and Prakash, Ananth and Frericks-Zipper, Anika and Eisenacher, Martin and Walzer, Mathias and Wang, Shengbo and Brazma, Alvis and Vizcaíno, Juan Antonio}, month = jan, year = {2022}, pages = {D543--D552}, } @article{sarkans_biostudies_2018, title = {The {BioStudies} database—one stop shop for all data supporting a life sciences study}, volume = {46}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkx965}, doi = {10.1093/nar/gkx965}, abstract = {BioStudies (www.ebi.ac.uk/biostudies) is a new public database that organizes data from biological studies. Typically, but not exclusively, a study is associated with a publication. BioStudies offers a simple way to describe the study structure, and provides flexible data deposition tools and data access interfaces. The actual data can be stored either in BioStudies or remotely, or both. BioStudies imports supplementary data from Europe PMC, and is a resource for authors and publishers for packaging data during the manuscript preparation process. It also can support data management needs of collaborative projects. The growth in multiomics experiments and other multi-faceted approaches to life sciences research mean that studies result in a diversity of data outputs in multiple locations. BioStudies presents a solution to ensuring that all these data and the associated publication(s) can be found coherently in the longer term.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Sarkans, Ugis and Gostev, Mikhail and Athar, Awais and Behrangi, Ehsan and Melnichuk, Olga and Ali, Ahmed and Minguet, Jasmine and Rada, Juan Camillo and Snow, Catherine and Tikhonov, Andrew and Brazma, Alvis and McEntyre, Johanna}, month = jan, year = {2018}, pages = {D1266--D1270}, } @article{courtot_biosamples_2019, title = {{BioSamples} database: an updated sample metadata hub}, volume = {47}, issn = {0305-1048}, shorttitle = {{BioSamples} database}, url = {https://doi.org/10.1093/nar/gky1061}, doi = {10.1093/nar/gky1061}, abstract = {The BioSamples database at EMBL-EBI provides a central hub for sample metadata storage and linkage to other EMBL-EBI resources. BioSamples has recently undergone major changes, both in terms of data content and supporting infrastructure. The data content has more than doubled from around 2 million samples in 2014 to just over 5 million samples in 2018. Fast, reciprocal data exchange was fully established between sister Biosample databases and other INSDC partners, enabling a worldwide common representation and centralization of sample metadata. The BioSamples platform has been upgraded to accommodate anticipated increases in the number of submissions via GA4GH driver projects such as the Human Cell Atlas and the EGA, as well as from mirroring of NCBI dbGaP data. The BioSamples database is now the authoritative repository for all INSDC sample metadata, an ELIXIR Deposition Database for Biomolecular Data and the EMBL-EBI sample metadata hub. To support faster turnaround for sample submission, and to increase scalability and resilience, we have upgraded the BioSamples database backend storage, APIs and user interface. Finally, the website has been redesigned to allow search and retrieval of records based on specific filters, such as ‘disease’ or ‘organism’. These changes are targeted at answering current use cases as well as providing functionalities for future emerging and anticipated developments. Availability: The BioSamples database is freely available at http://www.ebi.ac.uk/biosamples. Content is distributed under the EMBL-EBI Terms of Use available at https://www.ebi.ac.uk/about/terms-of-use.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Courtot, Mélanie and Cherubin, Luca and Faulconbridge, Adam and Vaughan, Daniel and Green, Matthew and Richardson, David and Harrison, Peter and Whetzel, Patricia L and Parkinson, Helen and Burdett, Tony}, month = jan, year = {2019}, pages = {D1172--D1178}, } @article{clough_gene_2016, title = {The {Gene} {Expression} {Omnibus} database}, volume = {1418}, issn = {1064-3745}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4944384/}, doi = {10.1007/978-1-4939-3578-9_5}, abstract = {The Gene Expression Omnibus (GEO) database is an international public repository that archives and freely distributes high-throughput gene expression and other functional genomics data sets. Created in 2000 as a worldwide resource for gene expression studies, GEO has evolved with rapidly changing technologies and now accepts high-throughput data for many other data applications, including those that examine genome methylation, chromatin structure, and genome–protein interactions. GEO supports community-derived reporting standards that specify provision of several critical study elements including raw data, processed data, and descriptive metadata. The database not only provides access to data for tens of thousands of studies, but also offers various Web-based tools and strategies that enable users to locate data relevant to their specific interests, as well as to visualize and analyze the data. This chapter includes detailed descriptions of methods to query and download GEO data and use the analysis and visualization tools. The GEO homepage is at http://www.ncbi.nlm.nih.gov/geo/.}, urldate = {2022-08-16}, journal = {Methods in molecular biology (Clifton, N.J.)}, author = {Clough, Emily and Barrett, Tanya}, year = {2016}, pmid = {27008011}, pmcid = {PMC4944384}, pages = {93--110}, } @article{parkinson_arrayexpresspublic_2007, title = {{ArrayExpress}—a public database of microarray experiments and gene expression profiles}, volume = {35}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkl995}, doi = {10.1093/nar/gkl995}, abstract = {ArrayExpress is a public database for high throughput functional genomics data. ArrayExpress consists of two parts—the ArrayExpress Repository, which is a MIAME supportive public archive of microarray data, and the ArrayExpress Data Warehouse, which is a database of gene expression profiles selected from the repository and consistently re-annotated. Archived experiments can be queried by experiment attributes, such as keywords, species, array platform, authors, journals or accession numbers. Gene expression profiles can be queried by gene names and properties, such as Gene Ontology terms and gene expression profiles can be visualized. ArrayExpress is a rapidly growing database, currently it contains data from \>50 000 hybridizations and \>1 500 000 individual expression profiles. ArrayExpress supports community standards, including MIAME, MAGE-ML and more recently the proposal for a spreadsheet based data exchange format: MAGE-TAB. Availability: .}, number = {suppl\_1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Parkinson, H. and Kapushesky, M. and Shojatalab, M. and Abeygunawardena, N. and Coulson, R. and Farne, A. and Holloway, E. and Kolesnykov, N. and Lilja, P. and Lukk, M. and Mani, R. and Rayner, T. and Sharma, A. and William, E. and Sarkans, U. and Brazma, A.}, month = jan, year = {2007}, pages = {D747--D750}, } @article{cncb-ngdc_members_and_partners_database_2022, title = {Database {Resources} of the {National} {Genomics} {Data} {Center}, {China} {National} {Center} for {Bioinformation} in 2022}, volume = {50}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkab951}, doi = {10.1093/nar/gkab951}, abstract = {The National Genomics Data Center (NGDC), part of the China National Center for Bioinformation (CNCB), provides a family of database resources to support global research in both academia and industry. With the explosively accumulated multi-omics data at ever-faster rates, CNCB-NGDC is constantly scaling up and updating its core database resources through big data archive, curation, integration and analysis. In the past year, efforts have been made to synthesize the growing data and knowledge, particularly in single-cell omics and precision medicine research, and a series of resources have been newly developed, updated and enhanced. Moreover, CNCB-NGDC has continued to daily update SARS-CoV-2 genome sequences, variants, haplotypes and literature. Particularly, OpenLB, an open library of bioscience, has been established by providing easy and open access to a substantial number of abstract texts from PubMed, bioRxiv and medRxiv. In addition, Database Commons is significantly updated by cataloguing a full list of global databases, and BLAST tools are newly deployed to provide online sequence search services. All these resources along with their services are publicly accessible at https://ngdc.cncb.ac.cn.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {{CNCB-NGDC Members and Partners}}, month = jan, year = {2022}, pages = {D27--D38}, } @article{okido_dna_2022, title = {{DNA} {Data} {Bank} of {Japan} ({DDBJ}) update report 2021}, volume = {50}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkab995}, doi = {10.1093/nar/gkab995}, abstract = {The Bioinformation and DDBJ (DNA Data Bank of Japan) Center (DDBJ Center; https://www.ddbj.nig.ac.jp) operates archival databases that collect nucleotide sequences, study and sample information, and distribute them without access restriction to progress life science research as a member of the International Nucleotide Sequence Database Collaboration (INSDC), in collaboration with the National Center for Biotechnology Information (NCBI) and the European Bioinformatics Institute. Besides the INSDC databases, the DDBJ Center also provides the Genomic Expression Archive for functional genomics data and the Japanese Genotype-phenotype Archive for human data requiring controlled access. Additionally, the DDBJ Center started a new public repository, MetaboBank, for experimental raw data and metadata from metabolomics research in October 2020. In response to the COVID-19 pandemic, the DDBJ Center openly shares SARS-CoV-2 genome sequences in collaboration with Shizuoka Prefecture and Keio University. The operation of DDBJ is based on the National Institute of Genetics (NIG) supercomputer, which is open for large-scale sequence data analysis for life science researchers. This paper reports recent updates on the archival databases and the services of DDBJ.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Okido, Toshihisa and Kodama, Yuichi and Mashima, Jun and Kosuge, Takehide and Fujisawa, Takatomo and Ogasawara, Osamu}, month = jan, year = {2022}, pages = {D102--D105}, } @article{katz_sequence_2022, title = {The {Sequence} {Read} {Archive}: a decade more of explosive growth}, volume = {50}, issn = {0305-1048}, shorttitle = {The {Sequence} {Read} {Archive}}, url = {https://doi.org/10.1093/nar/gkab1053}, doi = {10.1093/nar/gkab1053}, abstract = {The Sequence Read Archive (SRA, https://www.ncbi.nlm.nih.gov/sra/) stores raw sequencing data and alignment information to enhance reproducibility and facilitate new discoveries through data analysis. Here we note changes in storage designed to increase access and highlight analyses that augment metadata with taxonomic insight to help users select data. In addition, we present three unanticipated applications of taxonomic analysis.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Katz, Kenneth and Shutov, Oleg and Lapoint, Richard and Kimelman, Michael and Brister, J Rodney and O’Sullivan, Christopher}, month = jan, year = {2022}, pages = {D387--D390}, } @article{cezard_european_2022, title = {The {European} {Variation} {Archive}: a {FAIR} resource of genomic variation for all species}, volume = {50}, issn = {0305-1048}, shorttitle = {The {European} {Variation} {Archive}}, url = {https://doi.org/10.1093/nar/gkab960}, doi = {10.1093/nar/gkab960}, abstract = {The European Variation Archive (EVA; https://www.ebi.ac.uk/eva/) is a resource for sharing all types of genetic variation data (SNPs, indels, and structural variants) for all species. The EVA was created in 2014 to provide FAIR access to genetic variation data and has since grown to be a primary resource for genomic variants hosting \>3 billion records. The EVA and dbSNP have established a compatible global system to assign unique identifiers to all submitted genetic variants. The EVA is active within the Global Alliance of Genomics and Health (GA4GH), maintaining, contributing and implementing standards such as VCF, Refget and Variant Representation Specification (VRS). In this article, we describe the submission and permanent accessioning services along with the different ways the data can be retrieved by the scientific community.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Cezard, Timothe and Cunningham, Fiona and Hunt, Sarah E and Koylass, Baron and Kumar, Nitin and Saunders, Gary and Shen, April and Silva, Andres F and Tsukanov, Kirill and Venkataraman, Sundararaman and Flicek, Paul and Parkinson, Helen and Keane, Thomas M}, month = jan, year = {2022}, pages = {D1216--D1220}, } @article{moreno_expression_2022, title = {Expression {Atlas} update: gene and protein expression in multiple species}, volume = {50}, issn = {0305-1048}, shorttitle = {Expression {Atlas} update}, url = {https://doi.org/10.1093/nar/gkab1030}, doi = {10.1093/nar/gkab1030}, abstract = {The EMBL-EBI Expression Atlas is an added value knowledge base that enables researchers to answer the question of where (tissue, organism part, developmental stage, cell type) and under which conditions (disease, treatment, gender, etc) a gene or protein of interest is expressed. Expression Atlas brings together data from \>4500 expression studies from \>65 different species, across different conditions and tissues. It makes these data freely available in an easy to visualise form, after expert curation to accurately represent the intended experimental design, re-analysed via standardised pipelines that rely on open-source community developed tools. Each study's metadata are annotated using ontologies. The data are re-analyzed with the aim of reproducing the original conclusions of the underlying experiments. Expression Atlas is currently divided into Bulk Expression Atlas and Single Cell Expression Atlas. Expression Atlas contains data from differential studies (microarray and bulk RNA-Seq) and baseline studies (bulk RNA-Seq and proteomics), whereas Single Cell Expression Atlas is currently dedicated to Single Cell RNA-Sequencing (scRNA-Seq) studies. The resource has been in continuous development since 2009 and it is available at https://www.ebi.ac.uk/gxa.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Moreno, Pablo and Fexova, Silvie and George, Nancy and Manning, Jonathan R and Miao, Zhichiao and Mohammed, Suhaib and Muñoz-Pomer, Alfonso and Fullgrabe, Anja and Bi, Yalan and Bush, Natassja and Iqbal, Haider and Kumbham, Upendra and Solovyev, Andrey and Zhao, Lingyun and Prakash, Ananth and García-Seisdedos, David and Kundu, Deepti J and Wang, Shengbo and Walzer, Mathias and Clarke, Laura and Osumi-Sutherland, David and Tello-Ruiz, Marcela Karey and Kumari, Sunita and Ware, Doreen and Eliasova, Jana and Arends, Mark J and Nawijn, Martijn C and Meyer, Kerstin and Burdett, Tony and Marioni, John and Teichmann, Sarah and Vizcaíno, Juan Antonio and Brazma, Alvis and Papatheodorou, Irene}, month = jan, year = {2022}, pages = {D129--D140}, } @article{cummins_european_2022, title = {The {European} {Nucleotide} {Archive} in 2021}, volume = {50}, issn = {0305-1048}, url = {https://doi.org/10.1093/nar/gkab1051}, doi = {10.1093/nar/gkab1051}, abstract = {The European Nucleotide Archive (ENA, https://www.ebi.ac.uk/ena), maintained at the European Molecular Biology Laboratory's European Bioinformatics Institute (EMBL-EBI) provides freely accessible services, both for deposition of, and access to, open nucleotide sequencing data. Open scientific data are of paramount importance to the scientific community and contribute daily to the acceleration of scientific advance. Here, we outline the major updates to ENA’s services and infrastructure that have been delivered over the past year.}, number = {D1}, urldate = {2022-08-16}, journal = {Nucleic Acids Research}, author = {Cummins, Carla and Ahamed, Alisha and Aslam, Raheela and Burgin, Josephine and Devraj, Rajkumar and Edbali, Ossama and Gupta, Dipayan and Harrison, Peter W and Haseeb, Muhammad and Holt, Sam and Ibrahim, Talal and Ivanov, Eugene and Jayathilaka, Suran and Kadhirvelu, Vishnukumar and Kay, Simon and Kumar, Manish and Lathi, Ankur and Leinonen, Rasko and Madeira, Fabio and Madhusoodanan, Nandana and Mansurova, Milena and O’Cathail, Colman and Pearce, Matt and Pesant, Stéphane and Rahman, Nadim and Rajan, Jeena and Rinck, Gabriele and Selvakumar, Sandeep and Sokolov, Alexey and Suman, Swati and Thorne, Ross and Totoo, Prabhat and Vijayaraja, Senthilnathan and Waheed, Zahra and Zyoud, Ahmad and Lopez, Rodrigo and Burdett, Tony and Cochrane, Guy}, month = jan, year = {2022}, pages = {D106--D110}, } @article{linden_resource_2013, title = {{RESOURCE} {ENTITLEMENT} {MANAGEMENT} {SYSTEM}}, urldate = {2021-10-27}, journal = {Trans-european research and education networking association}, author = {Linden, Mikael and Nyrönen, Tommi and Lappalainen, Ilkka}, year = {2013}, keywords = {DAC, Data Access, software, tool}, } @article{adil_single-cell_2021, title = {Single-{Cell} {Transcriptomics}: {Current} {Methods} and {Challenges} in {Data} {Acquisition} and {Analysis}}, volume = {15}, issn = {1662-453X}, shorttitle = {Single-{Cell} {Transcriptomics}}, url = {https://www.frontiersin.org/articles/10.3389/fnins.2021.591122}, abstract = {Rapid cost drops and advancements in next-generation sequencing have made profiling of cells at individual level a conventional practice in scientific laboratories worldwide. Single-cell transcriptomics [single-cell RNA sequencing (SC-RNA-seq)] has an immense potential of uncovering the novel basis of human life. The well-known heterogeneity of cells at the individual level can be better studied by single-cell transcriptomics. Proper downstream analysis of this data will provide new insights into the scientific communities. However, due to low starting materials, the SC-RNA-seq data face various computational challenges: normalization, differential gene expression analysis, dimensionality reduction, etc. Additionally, new methods like 10× Chromium can profile millions of cells in parallel, which creates a considerable amount of data. Thus, single-cell data handling is another big challenge. This paper reviews the single-cell sequencing methods, library preparation, and data generation. We highlight some of the main computational challenges that require to be addressed by introducing new bioinformatics algorithms and tools for analysis. We also show single-cell transcriptomics data as a big data problem.}, urldate = {2022-07-29}, journal = {Frontiers in Neuroscience}, author = {Adil, Asif and Kumar, Vijay and Jan, Arif Tasleem and Asger, Mohammed}, year = {2021}, } @article{majumder_united_2018, title = {United {States}: law and policy concerning transfer of genomic data to third countries.}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1917-9}, doi = {10.1007/s00439-018-1917-9}, abstract = {This paper provides an overview of US laws and related guidance documents affecting transfer of genomic data to third countries, addressing the domains of consent, privacy, security, compatible processing/adequacy, and oversight. In general, US laws governing research and disclosure and use of data generated within the health care system do not impose different requirements on transfers to researchers and service providers based in third countries compared with US-based researchers or service providers. Of note, the US lacks a comprehensive data protection regime. Data protections are piecemeal, spread across bodies of law that target specific kinds of research or data generated or held by specific kinds of actors involved in the delivery of health care. Oversight is also distributed across a range of bodies, including institutional review boards and data access committees. The conclusion to this paper examines future directions in US law and policy, including proposals for more comprehensive protections for personal data.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Majumder, Mary Anderlik}, month = aug, year = {2018}, pages = {647--655}, } @article{taylor_united_2018, title = {United {Kingdom}: transfers of genomic data to third countries.}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1921-0}, doi = {10.1007/s00439-018-1921-0}, abstract = {In the United Kingdom (UK), transfer of genomic data to third countries is regulated by data protection legislation. This is a composite of domestic and European Union (EU) law, with EU law to be adopted as domestic law when Brexit takes place. In this paper we consider the content of data protection legislation and the likely impact of Brexit on transfers of genomic data from the UK to other countries. We examine the advice by regulators not to rely upon consent as a lawful basis for processing under data protection law, at least not when personal data are used for research purposes, and consider some of the other ways in which the research context can qualify an individual's ability to exercise control over processing operations. We explain how the process of pseudonymization is to be understood in the context of transfer of genomic data to third parties, as well as how adequacy of data protection in a third country is to be determined in general terms. We conclude with reflections on the future direction of UK data protection law post Brexit with the reclassification of the UK itself as a third country.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Taylor, M J and Wallace, S E and Prictor, M}, month = aug, year = {2018}, pages = {637--645}, } @article{kim_south_2018, title = {South {Korea}: in the midst of a privacy reform centered on data sharing.}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1920-1}, doi = {10.1007/s00439-018-1920-1}, abstract = {With rapid developments in genomic and digital technologies, genomic data sharing has become a key issue for the achievement of precision medicine in South Korea. The legal and administrative framework for data sharing and protection in this country is currently under intense scrutiny from national and international stakeholders. Policymakers are assessing the relevance of specific restrictions in national laws and guidelines for better alignment with international approaches. This manuscript will consider key issues in international genome data sharing in South Korea, including consent, privacy, security measures, compatible adequacy and oversight, and map out an approach to genomic data sharing that recognizes the importance of patient engagement and responsible use of data in South Korea.}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Kim, Hannah and Kim, So Yoon and Joly, Yann}, month = aug, year = {2018}, pages = {627--635}, } @article{dive_public_2020, title = {Public trust and global biobank networks.}, volume = {21}, url = {http://dx.doi.org/10.1186/s12910-020-00515-0}, doi = {10.1186/s12910-020-00515-0}, abstract = {BACKGROUND: Biobanks provide an important foundation for genomic and personalised medicine. In order to enhance their scientific power and scope, they are increasingly becoming part of national or international networks. Public trust is essential in fostering public engagement, encouraging donation to, and facilitating public funding for biobanks. Globalisation and networking of biobanking may challenge this trust. METHODS: We report the results of an Australian study examining public attitudes to the networking and globalisation of biobanks. The study used quantitative and qualitative methods in conjunction with bioethical analysis in order to determine factors that may contribute to, and threaten, trust. RESULTS: Our results indicate a generally high level of trust in biobanks and in medical research more broadly. Key factors that can reduce perceived trustworthiness of biobanks are commercialisation and involvement in global networking. CONCLUSIONS: We conclude that robust ethical oversight and governance standards can both promote trust in global biobanking and ensure that this trust is warranted.}, number = {1}, urldate = {2021-10-18}, journal = {BMC medical ethics}, author = {Dive, Lisa and Critchley, Christine and Otlowski, Margaret and Mason, Paul and Wiersma, Miriam and Light, Edwina and Stewart, Cameron and Kerridge, Ian and Lipworth, Wendy}, month = aug, year = {2020}, keywords = {Biobanks, Commercialisation, Globalisation, Trust}, pages = {73}, } @article{carroll_operationalizing_2021, title = {Operationalizing the {CARE} and {FAIR} {Principles} for {Indigenous} data futures.}, volume = {8}, url = {http://dx.doi.org/10.1038/s41597-021-00892-0}, doi = {10.1038/s41597-021-00892-0}, number = {1}, urldate = {2021-10-18}, journal = {Scientific data}, author = {Carroll, Stephanie Russo and Herczog, Edit and Hudson, Maui and Russell, Keith and Stall, Shelley}, month = apr, year = {2021}, pages = {108}, } @article{knoppers_introduction_2018, title = {Introduction: the why and whither of genomic data sharing.}, volume = {137}, url = {http://dx.doi.org/10.1007/s00439-018-1923-y}, doi = {10.1007/s00439-018-1923-y}, number = {8}, urldate = {2021-10-18}, journal = {Human Genetics}, author = {Knoppers, B M and Joly, Yann}, month = aug, year = {2018}, pages = {569--574}, } @article{kaye_data_2009, title = {Data sharing in genomics--re-shaping scientific practice.}, volume = {10}, url = {http://dx.doi.org/10.1038/nrg2573}, doi = {10.1038/nrg2573}, abstract = {Funding bodies have recently introduced a requirement that data sharing must be a consideration of all funding applications in genomics. As with all new developments this condition has had an impact on scientific practice, particularly in the area of publishing and in the conduct of research. We discuss the challenges that must be addressed if the full benefits of data sharing, as envisaged by funders, are to be realized.}, number = {5}, urldate = {2021-10-18}, journal = {Nature Reviews. Genetics}, author = {Kaye, Jane and Heeney, Catherine and Hawkins, Naomi and de Vries, Jantina and Boddington, Paula}, month = may, year = {2009}, pages = {331--335}, } @article{stark_australian_2019, title = {Australian {Genomics}: {A} {Federated} {Model} for {Integrating} {Genomics} into {Healthcare}.}, volume = {105}, url = {http://dx.doi.org/10.1016/j.ajhg.2019.06.003}, doi = {10.1016/j.ajhg.2019.06.003}, abstract = {Australian Genomics is a national collaborative research partnership of more than 80 organizations piloting a whole-of-system approach to integrating genomics into healthcare that is based on federation principles. The aim of Australian Genomics is to assess the application of genomic testing in healthcare at the translational interface between research and clinical delivery, with an emphasis on robust evaluation of outcomes. It encompasses two bodies of work: a research program prospectively providing genomic testing through exemplar clinical projects in rare diseases, cancers, and reproductive carrier screening and interdependent programs for advancing the diagnostic, health informatics, regulatory, ethical, policy, and workforce infrastructure necessary for the integration of genomics into the Australian health system. Copyright © 2019. Published by Elsevier Inc.}, number = {1}, urldate = {2021-06-24}, journal = {American Journal of Human Genetics}, author = {Stark, Zornitza and Boughtwood, Tiffany and Phillips, Peta and Christodoulou, John and Hansen, David P and Braithwaite, Jeffrey and Newson, Ainsley J and Gaff, Clara L and Sinclair, Andrew H and North, Kathryn N}, month = jul, year = {2019}, pages = {7--14}, } @techreport{queensland_genomics_blueprint_2020, title = {Blueprint for a {National} {Approach} to {Genomic} {Information} {Management} ({NAGIM})}, url = {https://queenslandgenomics.org/capability-initiatives/national-approach-to-genomics-information-management/}, urldate = {2022-01-06}, institution = {Queensland Health}, author = {{Queensland Genomics}}, month = oct, year = {2020}, } @article{schatz_inverting_2022, title = {Inverting the model of genomics data sharing with the {NHGRI} {Genomic} {Data} {Science} {Analysis}, {Visualization}, and {Informatics} {Lab}-space.}, volume = {2}, issn = {2666979X}, url = {https://linkinghub.elsevier.com/retrieve/pii/S2666979X21001063}, doi = {10.1016/j.xgen.2021.100085}, abstract = {The NHGRI Genomic Data Science Analysis, Visualization, and Informatics Lab-space (AnVIL; https://anvilproject.org) was developed to address a widespread community need for a unified computing environment for genomics data storage, management, and analysis. In this perspective, we present AnVIL, describe its ecosystem and interoperability with other platforms, and highlight how this platform and associated initiatives contribute to improved genomic data sharing efforts. The AnVIL is a federated cloud platform designed to manage and store genomics and related data, enable population-scale analysis, and facilitate collaboration through the sharing of data, code, and analysis results. By inverting the traditional model of data sharing, the AnVIL eliminates the need for data movement while also adding security measures for active threat detection and monitoring and provides scalable, shared computing resources for any researcher. We describe the core data management and analysis components of the AnVIL, which currently consists of Terra, Gen3, Galaxy, RStudio/Bioconductor, Dockstore, and Jupyter, and describe several flagship genomics datasets available within the AnVIL. We continue to extend and innovate the AnVIL ecosystem by implementing new capabilities, including mechanisms for interoperability and responsible data sharing, while streamlining access management. The AnVIL opens many new opportunities for analysis, collaboration, and data sharing that are needed to drive research and to make discoveries through the joint analysis of hundreds of thousands to millions of genomes along with associated clinical and molecular data types.}, number = {1}, urldate = {2022-01-14}, journal = {Cell Genomics}, author = {Schatz, Michael C and Philippakis, Anthony A and Afgan, Enis and Banks, Eric and Carey, Vincent J and Carroll, Robert J and Culotti, Alessandro and Ellrott, Kyle and Goecks, Jeremy and Grossman, Robert L and Hall, Ira M and Hansen, Kasper D and Lawson, Jonathan and Leek, Jeffrey T and Luria, Anne O'Donnell and Mosher, Stephen and Morgan, Martin and Nekrutenko, Anton and O'Connor, Brian D and Osborn, Kevin and Paten, Benedict and Patterson, Candace and Tan, Frederick J and Taylor, Casey Overby and Vessio, Jennifer and Waldron, Levi and Wang, Ting and Wuichet, Kristin}, month = jan, year = {2022}, } @article{anagnostou_emerging_2021, title = {The emerging complexity of {Open} {Science}: assessing {Intelligent} {Data} {Openness} in {Genomic} {Anthropology} and {Human} {Genomics}.}, volume = {99}, url = {http://dx.doi.org/10.4436/JASS.99016}, doi = {10.4436/JASS.99016}, abstract = {In recent decades, the scientific community has become aware of the importance of science being effectively open in order to speed up scientific and technological progress. In this context, the achievement of a robust, effective and responsible form of data sharing is now widely acknowledged as a fundamental part of the research process. The production and resolution of human genomic data has steadily increased in recent years, mainly due to technological advances and decreasing costs of DNA genotyping and sequencing. There is, however, a downside to this process due to the huge increase in the complexity of the data and related metadata. This means it is advisable to go beyond traditional forms of sharing analysis, which have focused on data availability only. Here we present a pilot study that aims to complement a survey on the availability of data related to peer-reviewed publications with an analysis of their findability, accessibility, useability and assessability (according to the "intelligent data openness" scheme). Sharing rates in genomic anthropology (73.0\%) were found to be higher than human genomics (32.4\%), but lower than closely related research fields (from 96.8\% to 79.2\% for paleogenetics and evolutionary genetics, respectively). We discuss the privacy and methodological issues that could be linked to this finding. Comparisons of sharing rates across a wide range of disciplines has suggested that the idea of human genomics as a forerunner for the open data movement should be questioned. Finally, both in genomic anthropology and human genomics, findability and useability were found to be compliant with the expectations of an intelligent data openness, whereas only a minor part of studies met the need to make the data completely assessable.}, urldate = {2022-01-05}, journal = {Journal of anthropological sciences = Rivista di antropologia : JASS / Istituto italiano di antropologia}, author = {Anagnostou, Paolo and Capocasa, Marco and Brisighelli, Francesca and Battaggia, Cinzia and Destro Bisol, Giovanni}, month = dec, year = {2021}, } @article{kerry_ega_2020, title = {{EGA} {Federated} {Node} {Operations}}, url = {https://ega-archive.org/files/EGA-Node-Operations-v1.pdf}, urldate = {2022-01-05}, author = {Kerry, Giselle and Keane, Thomas and Rambla, Jordi and Spalding, Dylan and Flicek, Paul and Navarro, Arcadi and Parkinson, Helen}, year = {2020}, } @article{keane_ega_2020, title = {{EGA} {Federation}: {Structure} and organisation}, urldate = {2022-01-05}, author = {Keane, Thomas and Rambla, Jordi and Spalding, Dylan and Flicek, Paul and Navarro, Arcadi and Parkinson, Helen}, year = {2020}, } @article{chen_genome_2021, title = {The genome sequence archive family: toward explosive data growth and diverse data types.}, url = {http://dx.doi.org/10.1016/j.gpb.2021.08.001}, doi = {10.1016/j.gpb.2021.08.001}, abstract = {The Genome Sequence Archive (GSA) is a data repository for archiving raw sequence data, which provides data storage and sharing services for worldwide scientific communities. Considering explosive data growth with diverse data types, here we present the GSA family by expanding into a set of resources for raw data archive with different purposes, namely, GSA (https://ngdc.cncb.ac.cn/gsa/), GSA for Human (GSA-Human, https://ngdc.cncb.ac.cn/gsa-human/), and Open Archive for Miscellaneous Data (OMIX, https://ngdc.cncb.ac.cn/omix/). Compared with the 2017 version, GSA has been significantly updated in data model, online functionalities, and web interfaces. GSA-Human, as a new partner of GSA, is a data repository specialized in human genetics-related data with controlled access and security. OMIX, as a critical complement to the two resources mentioned above, is an open archive for miscellaneous data. Together, all these resources form a family of resources dedicated to archiving explosive data with diverse types, accepting data submissions from all over the world, and providing free open access to all publicly available data in support of worldwide research activities. Copyright © 2021. Published by Elsevier B.V.}, urldate = {2022-01-04}, journal = {Genomics, proteomics \& bioinformatics / Beijing Genomics Institute}, author = {Chen, Tingting and Chen, Xu and Zhang, Sisi and Zhu, Junwei and Tang, Bixia and Wang, Anke and Dong, Lili and Zhang, Zhewen and Yu, Caixia and Sun, Yanling and Chi, Lianjiang and Chen, Huanxin and Zhai, Shuang and Sun, Yubin and Lan, Li and Zhang, Xin and Xiao, Jingfa and Bao, Yiming and Wang, Yanqing and Zhang, Zhang and Zhao, Wenming}, month = aug, year = {2021}, } @article{osterman_improving_2020, title = {Improving cancer data interoperability: the promise of the minimal common oncology data elements (mcode) initiative.}, volume = {4}, url = {http://dx.doi.org/10.1200/CCI.20.00059}, doi = {10.1200/CCI.20.00059}, abstract = {PURPOSE: Because of expanding interoperability requirements, structured patient data are increasingly available in electronic health records. Many oncology data elements (eg, staging, biomarkers, documentation of adverse events and cancer outcomes) remain challenging. The Minimal Common Oncology Data Elements (mCODE) project is a consensus data standard created to facilitate transmission of data of patients with cancer. METHODS: In 2018, mCODE was developed through a work group convened by ASCO, including oncologists, informaticians, researchers, and experts in terminologies and standards. The mCODE specification is organized by 6 high-level domains: patient, laboratory/vital, disease, genomics, treatment, and outcome. In total, 23 mCODE profiles are composed of 90 data elements. RESULTS: A conceptual model was published for public comment in January 2019 and, after additional refinement, the first public version of the mCODE (version 0.9.1) Fast Healthcare Interoperability Resources (FHIR) implementation guide (IG) was presented at the ASCO Annual Meeting in June 2019. The specification was approved for balloting by Health Level 7 International (HL7) in August 2019. mCODE passed the HL7 ballot in September 2019 with 86.5\% approval. The mCODE IG authors worked with HL7 reviewers to resolve all negative comments, leading to a modest expansion in the number of data elements and tighter alignment with FHIR and other HL7 conventions. The mCODE version 1.0 FHIR IG Standard for Trial Use was formally published on March 18, 2020. CONCLUSION: The mCODE project has the potential to offer tremendous benefits to cancer care delivery and research by creating an infrastructure to better share patient data. mCODE is available free from www.mCODEinitiative.org. Pilot implementations are underway, and a robust community of stakeholders has been assembled across the oncology ecosystem.}, urldate = {2021-11-18}, journal = {JCO clinical cancer informatics}, author = {Osterman, Travis J and Terry, May and Miller, Robert S}, month = oct, year = {2020}, pages = {993--1001}, } @article{cabili_empirical_2021, title = {Empirical validation of an automated approach to data use oversight}, volume = {1}, issn = {2666979X}, url = {https://linkinghub.elsevier.com/retrieve/pii/S2666979X21000380}, doi = {10.1016/j.xgen.2021.100031}, number = {2}, urldate = {2021-11-28}, journal = {Cell Genomics}, author = {Cabili, Moran N. and Lawson, Jonathan and Saltzman, Andrea and Rushton, Greg and O’Rourke, Pearl and Wilbanks, John and Rodriguez, Laura Lyman and Nyronen, Tommi and Courtot, Mélanie and Donnelly, Stacey and Philippakis, Anthony A.}, month = nov, year = {2021}, pages = {100031}, } @article{freeberg_european_2022, title = {The {European} {Genome}-phenome {Archive} in 2021.}, volume = {50}, url = {http://dx.doi.org/10.1093/nar/gkab1059}, doi = {10.1093/nar/gkab1059}, abstract = {The European Genome-phenome Archive (EGA - https://ega-archive.org/) is a resource for long term secure archiving of all types of potentially identifiable genetic, phenotypic, and clinical data resulting from biomedical research projects. Its mission is to foster hosted data reuse, enable reproducibility, and accelerate biomedical and translational research in line with the FAIR principles. Launched in 2008, the EGA has grown quickly, currently archiving over 4,500 studies from nearly one thousand institutions. The EGA operates a distributed data access model in which requests are made to the data controller, not to the EGA, therefore, the submitter keeps control on who has access to the data and under which conditions. Given the size and value of data hosted, the EGA is constantly improving its value chain, that is, how the EGA can contribute to enhancing the value of human health data by facilitating its submission, discovery, access, and distribution, as well as leading the design and implementation of standards and methods necessary to deliver the value chain. The EGA has become a key GA4GH Driver Project, leading multiple development efforts and implementing new standards and tools, and has been appointed as an ELIXIR Core Data Resource. © The Author(s) 2021. Published by Oxford University Press on behalf of Nucleic Acids Research.}, number = {D1}, urldate = {2021-11-24}, journal = {Nucleic Acids Research}, author = {Freeberg, Mallory Ann and Fromont, Lauren A and D'Altri, Teresa and Romero, Anna Foix and Ciges, Jorge Izquierdo and Jene, Aina and Kerry, Giselle and Moldes, Mauricio and Ariosa, Roberto and Bahena, Silvia and Barrowdale, Daniel and Barbero, Marcos Casado and Fernandez-Orth, Dietmar and Garcia-Linares, Carles and Garcia-Rios, Emilio and Haziza, Frédéric and Juhasz, Bela and Llobet, Oscar Martinez and Milla, Gemma and Mohan, Anand and Rueda, Manuel and Sankar, Aravind and Shaju, Dona and Shimpi, Ashutosh and Singh, Babita and Thomas, Coline and de la Torre, Sabela and Uyan, Umuthan and Vasallo, Claudia and Flicek, Paul and Guigo, Roderic and Navarro, Arcadi and Parkinson, Helen and Keane, Thomas and Rambla, Jordi}, month = jan, year = {2022}, pages = {D980--D987}, } @article{woolley_responsible_2018, title = {Responsible sharing of biomedical data and biospecimens via the "{Automatable} {Discovery} and {Access} {Matrix}" ({ADA}-{M}).}, volume = {3}, url = {http://dx.doi.org/10.1038/s41525-018-0057-4}, doi = {10.1038/s41525-018-0057-4}, abstract = {Given the data-rich nature of modern biomedical research, there is a pressing need for a systematic, structured, computer-readable way to capture, communicate, and manage sharing rules that apply to biomedical resources. This is essential for responsible recording, versioning, communication, querying, and actioning of resource sharing plans. However, lack of a common "information model" for rules and conditions that govern the sharing of materials, methods, software, data, and knowledge creates a fundamental barrier. Without this, it can be virtually impossible for Research Ethics Committees (RECs), Institutional Review Boards (IRBs), Data Access Committees (DACs), biobanks, and end users to confidently track, manage, and interpret applicable legal and ethical requirements. This raises costs and burdens of data stewardship and decreases efficient and responsible access to data, biospecimens, and other resources. To address this, the GA4GH and IRDiRC organizations sponsored the creation of the Automatable Discovery and Access Matrix (ADA-M, read simply as "Adam"). ADA-M is a comprehensive information model that provides the basis for producing structured metadata "Profiles" of regulatory conditions, thereby enabling efficient application of those conditions across regulatory spheres. Widespread use of ADA-M will aid researchers in globally searching and prescreening potential data and/or biospecimen resources for compatibility with their research plans in a responsible and efficient manner, increasing likelihood of timely DAC approvals while also significantly reducing time and effort DACs, RECs, and IRBs spend evaluating resource requests and research proposals. Extensive online documentation, software support, video guides, and an Application Programming Interface (API) for ADA-M have been made available.}, urldate = {2021-11-15}, journal = {NPJ genomic medicine}, author = {Woolley, J Patrick and Kirby, Emily and Leslie, Josh and Jeanson, Francis and Cabili, Moran N and Rushton, Gregory and Hazard, James G and Ladas, Vagelis and Veal, Colin D and Gibson, Spencer J and Tassé, Anne-Marie and Dyke, Stephanie O M and Gaff, Clara and Thorogood, Adrian and Knoppers, Bartha Maria and Wilbanks, John and Brookes, Anthony J}, month = jul, year = {2018}, pages = {17}, } @article{anderson_standardized_2013, title = {Standardized cardiovascular data for clinical research, registries, and patient care: a report from the {Data} {Standards} {Workgroup} of the {National} {Cardiovascular} {Research} {Infrastructure} project.}, volume = {61}, url = {http://dx.doi.org/10.1016/j.jacc.2012.12.047}, doi = {10.1016/j.jacc.2012.12.047}, abstract = {Relatively little attention has been focused on standardization of data exchange in clinical research studies and patient care activities. Both are usually managed locally using separate and generally incompatible data systems at individual hospitals or clinics. In the past decade there have been nascent efforts to create data standards for clinical research and patient care data, and to some extent these are helpful in providing a degree of uniformity. Nonetheless, these data standards generally have not been converted into accepted computer-based language structures that could permit reliable data exchange across computer networks. The National Cardiovascular Research Infrastructure (NCRI) project was initiated with a major objective of creating a model framework for standard data exchange in all clinical research, clinical registry, and patient care environments, including all electronic health records. The goal is complete syntactic and semantic interoperability. A Data Standards Workgroup was established to create or identify and then harmonize clinical definitions for a base set of standardized cardiovascular data elements that could be used in this network infrastructure. Recognizing the need for continuity with prior efforts, the Workgroup examined existing data standards sources. A basic set of 353 elements was selected. The NCRI staff then collaborated with the 2 major technical standards organizations in health care, the Clinical Data Interchange Standards Consortium and Health Level Seven International, as well as with staff from the National Cancer Institute Enterprise Vocabulary Services. Modeling and mapping were performed to represent (instantiate) the data elements in appropriate technical computer language structures for endorsement as an accepted data standard for public access and use. Fully implemented, these elements will facilitate clinical research, registry reporting, administrative reporting and regulatory compliance, and patient care. Copyright © 2013 American College of Cardiology Foundation. Published by Elsevier Inc. All rights reserved.}, number = {18}, urldate = {2021-11-08}, journal = {Journal of the American College of Cardiology}, author = {Anderson, H Vernon and Weintraub, William S and Radford, Martha J and Kremers, Mark S and Roe, Matthew T and Shaw, Richard E and Pinchotti, Dana M and Tcheng, James E}, month = may, year = {2013}, pages = {1835--1846}, } @article{williams_health_2010, title = {Health behaviours, socioeconomic status and diabetes incidence: the {Australian} {Diabetes} {Obesity} and {Lifestyle} {Study} ({AusDiab}).}, volume = {53}, url = {http://dx.doi.org/10.1007/s00125-010-1888-4}, doi = {10.1007/s00125-010-1888-4}, abstract = {AIMS/HYPOTHESIS: To identify the impact of socioeconomic status on incident impaired glucose metabolism and type 2 diabetes and to investigate the mediating role of health behaviours on this relationship using national, population-based data. METHODS: The Australian Diabetes Obesity and Lifestyle (AusDiab) Study is a national, population-based, longitudinal study of adults aged 25 years and above. A total sample of 4,405 people provided complete baseline (1999-2000) and 5 year follow-up (2004-2005) data relevant for these analyses. Fasting plasma glucose and 2 h plasma glucose were obtained from an OGTT, and demographic, socioeconomic and behavioural data were collected by interview and questionnaire. Multinomial logistic regression examined the role of socioeconomic position in the development of diabetes and mediation analyses tested the contribution of health behaviours in this relationship. RESULTS: Highest level of education was a stronger predictor of incident impaired glucose tolerance and type 2 diabetes (p = 0.002), compared with household income (p = 0.103), and occupational grade (p = 0.202). Education remained a significant independent predictor of diabetes in fully adjusted models. However, the relationship was attenuated by the health behaviours (smoking and physical activity). Mediation analyses indicated that these behaviours were partial mediators (explaining 27\%) of the socioeconomic status-diabetes relationship. CONCLUSION/INTERPRETATION: Smoking and physical activity partly mediate the relationship between low education and type 2 diabetes. Identification of these modifiable behavioural mediators should facilitate the development of effective health promotion campaigns to target those at high risk of developing type 2 diabetes.}, number = {12}, urldate = {2021-11-11}, journal = {Diabetologia}, author = {Williams, E D and Tapp, R J and Magliano, D J and Shaw, J E and Zimmet, P Z and Oldenburg, B F}, month = dec, year = {2010}, pages = {2538--2545}, } @article{jin_cineca_2021, title = {{CINECA} synthetic cohort {NA} {Canada} {CHILD} [{CC}-{BY}-{NC}-{SA}]}, url = {https://zenodo.org/record/5122832}, doi = {10.5281/zenodo.5122832}, abstract = {The "CINECA synthetic cohort NA Canada CHILD" dataset is a synthetic dataset developed to provide insight into how data is structured for select common attributes in the CHILD Cohort Study, but not reveal any personal or identifiable information associated with cohort participants. Such synthetic datasets are valuable for software developers to be able to see specific examples of data for common attributes (i.e. a minimal metadata model of a selection of common variables usually present in cohorts). This dataset comprises 100 variables for 150 synthetic participants which have faked phenotypic data that reflects CHILD cohort data. In addition, there is genetic data based on the 1000 Genomes project. This dataset was created within the context of the CINECA project. More information about the creation of this dataset can be found in the included documentation. Please note this preamble must be included with any distribution of this dataset: This synthetic dataset (with cohort “participants” / ”subjects” marked with FAKE) has no identifiable data and cannot be used to make any inference about CHILD cohort data or results. The purpose of this dataset is to aid development of technical implementations for cohort data discovery, harmonization, access, and federated analysis. In support of FAIRness in data sharing, this dataset is made freely available under the Creative Commons Licence (CC-BY; https://creativecommons.org/licenses/by-nc-sa/4.0/). Please ensure this preamble is included with this dataset and that the CHILD project and the CINECA project (funding: EC H2020 grant 825775 and CIHR grant 404896) are acknowledged. If you have any questions about this dataset contact Fiona Brinkman at brinkman@sfu.ca or Erin Gill at egill@sfu.ca. CINECA synthetic cohorts CINECA synthetic cohort Africa H3ABioNet CINECA synthetic cohort Europe CH SIB CINECA synthetic cohort Europe UK1}, urldate = {2021-10-19}, journal = {Zenodo}, author = {Jin, Vivian and Cook, Justin and Dubeau, Aimée and Dai, Ruixue and Freitas, Tyler and Dursi, Jonathan and Gill, Erin and Winsor, Geoff and Courtot, Melanie and Subbarao, Padmaja and Brinkman, Fiona}, year = {2021}, } @article{dursi_candig_2021, title = {Candig: secure federated genomic queries and analyses across jurisdictions}, url = {http://biorxiv.org/lookup/doi/10.1101/2021.03.30.434101}, doi = {10.1101/2021.03.30.434101}, abstract = {Rapid expansions of bioinformatics and computational biology have broadened the collection and use of -omics data including genomic, transcriptomic, methylomic and a myriad of other health data types, in the clinic and the laboratory. Both clinical and research uses of such data require co-analysis with large datasets, for which participant privacy and the need for data custodian controls must remain paramount. This is particularly challenging in multi-jurisdictional settings, such as Canada, where health privacy and security requirements are often heterogeneous. Data federation presents a solution to this, allowing for integration and analysis of large datasets from various sites while abiding by local policies. The Canadian Distributed Infrastructure for Genomics platform (CanDIG) enables federated querying and analysis of -omics and health data while keeping that data local and under local control. It builds upon existing infrastructures to connect five health and research institutions across Canada, relies heavily on standards and tooling brought together by the Global Alliance for Genomics and Health (GA4GH), implements a clear division of responsibilities among its participants and adheres to international data sharing standards. Participating researchers and clinicians can therefore contribute to and quickly access a critical mass of -omics data across a national network in a manner that takes into account the multi-jurisdictional nature of our privacy and security policies. Through this, CanDIG gives medical and research communities the tools needed to use and analyze the ever-growing amount of -omics data available to them in order to improve our understanding and treatment of various conditions and diseases. CanDIG is being used to make genomic and phenotypic data available for querying across Canada as part of data sharing for five leading pan-Canadian projects including the Terry Fox Comprehensive Cancer Care Centre Consortium Network (TF4CN) and Terry Fox PRecision Oncology For Young peopLE (PROFYLE), and making data from provincial projects such as POG (Personalized Onco- Genomics) more widely available.}, urldate = {2021-10-19}, journal = {BioRxiv}, author = {Dursi, L. Jonathan and Bozoky, Zoltan and de Borja, Richard and Li, Jimmy and Bujold, David and Lipski, Adam and Rashid, Shaikh Farhan and Sethi, Amanjeev and Memon, Neelam and Naidoo, Dashaylan and Coral-Sasso, Felipe and Wong, Matthew and Quirion, P-O and Lu, Zhibin and Agarwal, Samarth and Pavlov, Kat and Ponomarev, Andrew and Husic, Mia and Pace, Krista and Palmer, Samantha L. and Grover, Stephanie A. and Hakgor, Sevan and Siu, Lillian L. and Malkin, David and Virtanen, Carl and Pugh, Trevor J. and Jacques, Pierre-Étienne and Joly, Yann and Jones, Steven J. M. and Bourque, Guillaume and Brudno, Michael}, month = mar, year = {2021}, } @article{jin_cineca_2020, title = {{CINECA} {Cohort} {Level} metadata {Representation} {D3}.1}, url = {https://zenodo.org/record/4575460}, doi = {10.5281/zenodo.4575460}, abstract = {To support human cohort genomic and other “omic” data discovery and analysis across jurisdictions, basic data such as cohort participant age, sex, etc needs to be harmonised. Developing a key “minimal metadata model” of these basic attributes which should be recorded with all cohorts is critical to aid initial querying across jurisdictions for suitable dataset discovery. We describe here the creation of a minimal metadata model, the specific methods used to create the minimal metadata model, and this model’s utility and impact. A first version of the metadata model was built based on a review of Maelstrom research data standards and a manual survey of cohort data dictionaries, which identified and incorporated overlapping core variables across CINECA cohorts. The model was then converted to Genomics Cohorts Knowledge Ontology (GECKO) format and further expanded with additional terms. The minimal metadata model is being made broadly available to aid any project or projects, including those outside of CINECA interested in facilitating cross-jurisdictional data discovery and analysis.}, urldate = {2021-10-19}, journal = {Zenodo}, author = {Jin, Vivian and Brinkman, Fiona}, year = {2020}, } @inproceedings{fernandez-orth_european_2019, title = {European {Genome}-{Phenome} {Archive} ({EGA}) - {Granular} {Solutions} for the {Next} 10 {Years}}, isbn = {978-1-72812-286-1}, url = {https://ieeexplore.ieee.org/document/8787508/}, doi = {10.1109/CBMS.2019.00011}, urldate = {2021-10-18}, booktitle = {2019 {IEEE} 32nd {International} {Symposium} on {Computer}-{Based} {Medical} {Systems} ({CBMS})}, publisher = {IEEE}, author = {Fernandez-Orth, Dietmar and Lloret-Villas, Audald and Rambla de Argila, Jordi}, month = jun, year = {2019}, pages = {4--6}, } @article{hekel_privacy-preserving_2021, title = {Privacy-preserving storage of sequenced genomic data.}, volume = {22}, url = {http://dx.doi.org/10.1186/s12864-021-07996-2}, doi = {10.1186/s12864-021-07996-2}, abstract = {BACKGROUND: The current and future applications of genomic data may raise ethical and privacy concerns. Processing and storing of this data introduce a risk of abuse by potential offenders since the human genome contains sensitive personal information. For this reason, we have developed a privacy-preserving method, named Varlock providing secure storage of sequenced genomic data. We used a public set of population allele frequencies to mask the personal alleles detected in genomic reads. Each personal allele described by the public set is masked by a randomly selected population allele with respect to its frequency. Masked alleles are preserved in an encrypted confidential file that can be shared in whole or in part using public-key cryptography. RESULTS: Our method masked the personal variants and introduced new variants detected in a personal masked genome. Alternative alleles with lower population frequency were masked and introduced more often. We performed a joint PCA analysis of personal and masked VCFs, showing that the VCFs between the two groups cannot be trivially mapped. Moreover, the method is reversible and personal alleles in specific genomic regions can be unmasked on demand. CONCLUSION: Our method masks personal alleles within genomic reads while preserving valuable non-sensitive properties of sequenced DNA fragments for further research. Personal alleles in the desired genomic regions may be restored and shared with patients, clinics, and researchers. We suggest that the method can provide an additional security layer for storing and sharing of the raw aligned reads. © 2021. The Author(s).}, number = {1}, urldate = {2021-10-19}, journal = {BMC Genomics}, author = {Hekel, Rastislav and Budis, Jaroslav and Kucharik, Marcel and Radvanszky, Jan and Pös, Zuzana and Szemes, Tomas}, month = oct, year = {2021}, pages = {712}, } @article{grossman_case_2016, title = {A case for data commons: toward data science as a service}, volume = {18}, issn = {1521-9615}, url = {http://ieeexplore.ieee.org/document/7548983/}, doi = {10.1109/MCSE.2016.92}, abstract = {Data commons collocate data, storage, and computing infrastructure with core services and commonly used tools and applications for managing, analyzing, and sharing data to create an interoperable resource for the research community. An architecture for data commons is described, as well as some lessons learned from operating several large-scale data commons.}, number = {5}, urldate = {2022-05-12}, journal = {Computing in science \& engineering}, author = {Grossman, Robert L. and Heath, Allison and Murphy, Mark and Patterson, Maria and Wells, Walt}, month = sep, year = {2016}, keywords = {Data commons, Data sharing}, pages = {10--20}, } @article{hughes_harmonization_2019, title = {Harmonization of clinical data across {Gen3} data commons.}, volume = {37}, issn = {0732-183X}, url = {http://ascopubs.org/doi/10.1200/JCO.2019.37.15_suppl.e18094}, doi = {10.1200/JCO.2019.37.15_suppl.e18094}, abstract = {e18094 Background: Gen3 is an open source software platform for developing and operating data commons. Gen3 systems are now used by a variety of institutions and agencies to share and analyze large biomedical datasets including clinical and genomic data. One of the challenges of working with these datasets is disparate clinical data standards used by researchers across different studies and fields. We have worked to address these hurdles in a variety of ways. Methods: Gen3 is an open source software platform for developing and operating data commons. Detailed specification and features can be found at https://gen3.org/ with code located on GitHub ( https://github.com/UC-cdis ). Results: The Gen3 data model is a graphical representation of the different nodes or classes of data that have been collected. Examples include diagnosis, demographic, exposure, and family history. The properties and values on each node are controlled by the data dictionary specified by the data commons creator. While each commons may have a unique data model and dictionary, specifying external standards allows for easier submission of new data and assists data consumers with interpretation of results. A variety of external references can be supported, but here we demonstrate the use of the National Cancer Institute Thesaurus (NCIt). NCIt provides reference terminologies and biomedical standards that contain a rich set of terms, codes, definitions, and concepts. Using the same reference standards across commons allows for the export of clinical data between commons. The Portable Format for Biomedical Data (PFB) was created to facilitate data export and to allow the data dictionary schema as well as the raw data to be compressed and exported. This new file format, which utilizes an Avro serialization, is small, fast, easy to modify, and enables simple data export and import. PFB also has the ability to house entire external reference ontologies and it is easy to update the PFB references as changes are introduced. Conclusions: We have shown here how the Gen3 data model, use of external reference standards for clinical data, and the export/import format of PFB enable the harmonization of clinical data across different data commons.}, number = {15\_suppl}, urldate = {2021-10-19}, journal = {JCO}, author = {Hughes, LaRon and Grossman, Robert L. and Flamig, Zachary and Prokhorenkov, Andrew and Lukowski, Michael and Fitzsimons, Michael and Lichtenberg, Tara and Tang, Yajing}, month = may, year = {2019}, pages = {e18094--e18094}, } @article{tanjo_practical_2021, title = {Practical guide for managing large-scale human genome data in research.}, volume = {66}, issn = {1434-5161}, url = {http://www.nature.com/articles/s10038-020-00862-1}, doi = {10.1038/s10038-020-00862-1}, abstract = {Studies in human genetics deal with a plethora of human genome sequencing data that are generated from specimens as well as available on public domains. With the development of various bioinformatics applications, maintaining the productivity of research, managing human genome data, and analyzing downstream data is essential. This review aims to guide struggling researchers to process and analyze these large-scale genomic data to extract relevant information for improved downstream analyses. Here, we discuss worldwide human genome projects that could be integrated into any data for improved analysis. Obtaining human whole-genome sequencing data from both data stores and processes is costly; therefore, we focus on the development of data format and software that manipulate whole-genome sequencing. Once the sequencing is complete and its format and data processing tools are selected, a computational platform is required. For the platform, we describe a multi-cloud strategy that balances between cost, performance, and customizability. A good quality published research relies on data reproducibility to ensure quality results, reusability for applications to other datasets, as well as scalability for the future increase of datasets. To solve these, we describe several key technologies developed in computer science, including workflow engine. We also discuss the ethical guidelines inevitable for human genomic data analysis that differ from model organisms. Finally, the future ideal perspective of data processing and analysis is summarized.}, number = {1}, urldate = {2021-10-19}, journal = {Journal of Human Genetics}, author = {Tanjo, Tomoya and Kawai, Yosuke and Tokunaga, Katsushi and Ogasawara, Osamu and Nagasaki, Masao}, year = {2021}, pages = {39--52}, } @article{vernon_metabolic_2021, title = {Metabolic {Signatures} in {Coronary} {Artery} {Disease}: {Results} from the {BioHEART}-{CT} {Study}.}, volume = {10}, url = {http://dx.doi.org/10.3390/cells10050980}, doi = {10.3390/cells10050980}, abstract = {Despite effective prevention programs targeting cardiovascular risk factors, coronary artery disease (CAD) remains the leading cause of death. Novel biomarkers are needed for improved risk stratification and primary prevention. To assess for independent associations between plasma metabolites and specific CAD plaque phenotypes we performed liquid chromatography mass-spectrometry on plasma from 1002 patients in the BioHEART-CT study. Four metabolites were examined as candidate biomarkers. Dimethylguanidino valerate (DMGV) was associated with presence and amount of CAD (OR) 1.41 (95\% Confidence Interval [CI] 1.12-1.79, p = 0.004), calcified plaque, and obstructive CAD (p {\textless} 0.05 for both). The association with amount of plaque remained after adjustment for traditional risk factors, ß-coefficient 0.17 (95\% CI 0.02-0.32, p = 0.026). Glutamate was associated with the presence of non-calcified plaque, OR 1.48 (95\% CI 1.09-2.01, p = 0.011). Phenylalanine was associated with amount of CAD, ß-coefficient 0.33 (95\% CI 0.04-0.62, p = 0.025), amount of calcified plaque, (ß-coefficient 0.88, 95\% CI 0.23-1.53, p = 0.008), and obstructive CAD, OR 1.84 (95\% CI 1.01-3.31, p = 0.046). Trimethylamine N-oxide was negatively associated non-calcified plaque OR 0.72 (95\% CI 0.53-0.97, p = 0.029) and the association remained when adjusted for traditional risk factors. In targeted metabolomic analyses including 53 known metabolites and controlling for a 5\% false discovery rate, DMGV was strongly associated with the presence of calcified plaque, OR 1.59 (95\% CI 1.26-2.01, p = 0.006), obstructive CAD, OR 2.33 (95\% CI 1.59-3.43, p = 0.0009), and amount of CAD, ß-coefficient 0.3 (95\% CI 0.14-0.45, p = 0.014). In multivariate analyses the lipid and nucleotide metabolic pathways were both associated with the presence of CAD, after adjustment for traditional risk factors. We report novel associations between CAD plaque phenotypes and four metabolites previously associated with CAD. We also identified two metabolic pathways strongly associated with CAD, independent of traditional risk factors. These pathways warrant further investigation at both a biomarker and mechanistic level.}, number = {5}, urldate = {2021-11-11}, journal = {Cells}, author = {Vernon, Stephen T and Tang, Owen and Kim, Taiyun and Chan, Adam S and Kott, Katharine A and Park, John and Hansen, Thomas and Koay, Yen C and Grieve, Stuart M and O'Sullivan, John F and Yang, Jean Y and Figtree, Gemma A}, month = apr, year = {2021}, } @article{linden_common_2018, title = {Common {ELIXIR} service for researcher authentication and authorisation.}, volume = {7}, url = {http://dx.doi.org/10.12688/f1000research.15161.1}, doi = {10.12688/f1000research.15161.1}, abstract = {A common Authentication and Authorisation Infrastructure (AAI) that would allow single sign-on to services has been identified as a key enabler for European bioinformatics. ELIXIR AAI is an ELIXIR service portfolio for authenticating researchers to ELIXIR services and assisting these services on user privileges during research usage. It relieves the scientific service providers from managing the user identities and authorisation themselves, enables the researcher to have a single set of credentials to all ELIXIR services and supports meeting the requirements imposed by the data protection laws. ELIXIR AAI was launched in late 2016 and is part of the ELIXIR Compute platform portfolio. By the end of 2017 the number of users reached 1000, while the number of relying scientific services was 36. This paper presents the requirements and design of the ELIXIR AAI and the policies related to its use, and how it can be used for serving some example services, such as document management, social media, data discovery, human data access, cloud compute and training services.}, urldate = {2021-10-19}, journal = {F1000Research}, author = {Linden, Mikael and Prochazka, Michal and Lappalainen, Ilkka and Bucik, Dominik and Vyskocil, Pavel and Kuba, Martin and Silén, Sami and Belmann, Peter and Sczyrba, Alexander and Newhouse, Steven and Matyska, Ludek and Nyrönen, Tommi}, month = aug, year = {2018}, } @article{barr_risk_2007, title = {Risk of cardiovascular and all-cause mortality in individuals with diabetes mellitus, impaired fasting glucose, and impaired glucose tolerance: the {Australian} {Diabetes}, {Obesity}, and {Lifestyle} {Study} ({AusDiab}).}, volume = {116}, url = {http://dx.doi.org/10.1161/CIRCULATIONAHA.106.685628}, doi = {10.1161/CIRCULATIONAHA.106.685628}, abstract = {BACKGROUND: Diabetes mellitus increases the risk of cardiovascular disease (CVD) and all-cause mortality. The relationship between milder elevations of blood glucose and mortality is less clear. This study investigated whether impaired fasting glucose and impaired glucose tolerance, as well as diabetes mellitus, increase the risk of all-cause and CVD mortality. METHODS AND RESULTS: In 1999 to 2000, glucose tolerance status was determined in 10,428 participants of the Australian Diabetes, Obesity, and Lifestyle Study (AusDiab). After a median follow-up of 5.2 years, 298 deaths occurred (88 CVD deaths). Compared with those with normal glucose tolerance, the adjusted all-cause mortality hazard ratios (HRs) and 95\% confidence intervals (CIs) for known diabetes mellitus and newly diagnosed diabetes mellitus were 2.3 (1.6 to 3.2) and 1.3 (0.9 to 2.0), respectively. The risk of death was also increased in those with impaired fasting glucose (HR 1.6, 95\% CI 1.0 to 2.4) and impaired glucose tolerance (HR 1.5, 95\% CI 1.1 to 2.0). Sixty-five percent of all those who died of CVD had known diabetes mellitus, newly diagnosed diabetes mellitus, impaired fasting glucose, or impaired glucose tolerance at baseline. Known diabetes mellitus (HR 2.6, 95\% CI 1.4 to 4.7) and impaired fasting glucose (HR 2.5, 95\% CI 1.2 to 5.1) were independent predictors for CVD mortality after adjustment for age, sex, and other traditional CVD risk factors, but impaired glucose tolerance was not (HR 1.2, 95\% CI 0.7 to 2.2). CONCLUSIONS: This study emphasizes the strong association between abnormal glucose metabolism and mortality, and it suggests that this condition contributes to a large number of CVD deaths in the general population. CVD prevention may be warranted in people with all categories of abnormal glucose metabolism.}, number = {2}, urldate = {2021-11-11}, journal = {Circulation}, author = {Barr, Elizabeth L M and Zimmet, Paul Z and Welborn, Timothy A and Jolley, Damien and Magliano, Dianna J and Dunstan, David W and Cameron, Adrian J and Dwyer, Terry and Taylor, Hugh R and Tonkin, Andrew M and Wong, Tien Y and McNeil, John and Shaw, Jonathan E}, month = jul, year = {2007}, pages = {151--157}, } @article{knoppers_framework_2014, title = {Framework for responsible sharing of genomic and health-related data.}, volume = {8}, url = {http://dx.doi.org/10.1186/s11568-014-0003-1}, doi = {10.1186/s11568-014-0003-1}, number = {1}, urldate = {2021-11-30}, journal = {The HUGO journal}, author = {Knoppers, Bartha Maria}, month = dec, year = {2014}, pages = {3}, } @article{stephens_big_2015, title = {Big data: astronomical or genomical?}, volume = {13}, issn = {1545-7885}, url = {http://dx.plos.org/10.1371/journal.pbio.1002195}, doi = {10.1371/journal.pbio.1002195}, abstract = {Genomics is a Big Data science and is going to get much bigger, very soon, but it is not known whether the needs of genomics will exceed other Big Data domains. Projecting to the year 2025, we compared genomics with three other major generators of Big Data: astronomy, YouTube, and Twitter. Our estimates show that genomics is a "four-headed beast"--it is either on par with or the most demanding of the domains analyzed here in terms of data acquisition, storage, distribution, and analysis. We discuss aspects of new technologies that will need to be developed to rise up and meet the computational challenges that genomics poses for the near future. Now is the time for concerted, community-wide planning for the "genomical" challenges of the next decade.}, number = {7}, urldate = {2015-08-04}, journal = {PLoS Biology}, author = {Stephens, Zachary D and Lee, Skylar Y and Faghri, Faraz and Campbell, Roy H and Zhai, Chengxiang and Efron, Miles J and Iyer, Ravishankar and Schatz, Michael C and Sinha, Saurabh and Robinson, Gene E}, month = jul, year = {2015}, pages = {e1002195}, } @article{bard_ontologies_2004, title = {Ontologies in biology: design, applications and future challenges.}, volume = {5}, url = {http://dx.doi.org/10.1038/nrg1295}, doi = {10.1038/nrg1295}, number = {3}, urldate = {2021-11-15}, journal = {Nature Reviews. Genetics}, author = {Bard, Jonathan B L and Rhee, Seung Y}, month = mar, year = {2004}, pages = {213--222}, }