{ "255da7ea1": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "\u00d6NB, Cod. 3891. Ground Truth", "url": "10.5281/zenodo.7467249", "authors": [ { "name": "Ainonen", "surname": "Tuija", "roles": [ "transcriber" ] }, { "name": "Andresen", "surname": "Suse", "roles": [ "transcriber" ] }, { "name": "Bakker", "surname": "Lo\u00efs", "roles": [ "transcriber" ] }, { "name": "Boylan", "surname": "Amy", "roles": [ "transcriber" ] }, { "name": "Della Manna", "surname": "Silvia", "roles": [ "transcriber" ] }, { "name": "Dziemski", "surname": "Wiktor", "orcid": "0000-0001-8166-2249" }, { "name": "Henderson", "surname": "C. E. M.", "orcid": "0000-0002-5040-9926", "roles": [ "transcriber" ] }, { "name": " Impagnatiello", "surname": "Michele", "roles": [ "transcriber" ] }, { "name": "Jenko Kova\u010di\u010d", "surname": "Ana", "orcid": "0000-0001-7243-7082", "roles": [ "transcriber" ] }, { "name": "Komatovi\u0107", "surname": "Stevan", "roles": [ "transcriber" ] }, { "name": "Ku", "surname": "Ruby Wai-Ying", "orcid": "0000-0003-2688-6287", "roles": [ "transcriber" ] }, { "name": "Loss", "surname": "Edward", "orcid": "0000-0002-9837-8321", "roles": [ "transcriber" ] }, { "name": "Mairhofer", "surname": "Daniela", "orcid": "0000-0002-3531-9658", "roles": [ "transcriber", "project-manager" ] }, { "name": "Morcos", "surname": "Erene", "roles": [ "transcriber" ] }, { "name": "Odstr\u010dil\u00edk", "surname": "Jan", "orcid": "0000-0001-9104-9827", "roles": [ "transcriber" ] }, { "name": "Paternic\u00f2", "surname": "Giuseppe", "orcid": "0000-0002-7124-8869", "roles": [ "transcriber" ] }, { "name": "Riparante", "surname": "Marta", "roles": [ "transcriber" ] }, { "name": "Schimdt", "surname": "Nathalie", "roles": [ "transcriber" ] }, { "name": "So\u0142omieniuk", "surname": "Michal", "roles": [ "transcriber" ] }, { "name": "Walczak", "surname": "Tomasz ", "roles": [ "transcriber" ] }, { "name": "Zharov", "surname": "Dmitry", "roles": [ "transcriber" ] } ], "institutions": [], "description": "The Ground Truth was produced by the participants of the HTR Winter School 2022 in the Late Latin Group (more information: https://www.oeaw.ac.at/imafo/veranstaltungen/detail/introduction-into-handwritten-text-recognition).\n\nThe Ground Thruth includes the following folios: 1-3r, 6-8, 11r, 27 and is still work in progress. We are adding more pages soon. If you find any errors we kindly ask you to contact Jan Odstr\u010dil\u00edk (jan.odstrcilik@oeaw.ac.at).\n\nThe Supervisors of the Late Latin Group: Jan Odstr\u010dil\u00edk PhD, Austrian Acadamy of Sciences, Daniela Mairhofer PhD, Princeton University, Tobias Hodel PhD, University of Bern.", "project-name": "HTR Winter School 2022, Vienna", "language": [ "lat" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1200", "notAfter": "1299" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "lines", "count": 952 } ], "transcription-guidelines": "Regular transcription with expansion of abbreviations. \n- Normalization of J to I \n- V to U in the vowel function, U to V in the consonant function\n- long S to S. \n- No correction of mispellings (tagged in the ground truth)\n- No standardization of lower-case and upper-case letters\n- No added interpunction", "automatically-aligned": false, "_pid": "255da7ea1" }, "c326a6fee": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Donn\u00e9es v\u00e9rit\u00e9 de terrain HTR+ Annuaire des propri\u00e9taires et des propri\u00e9t\u00e9s de Paris et du d\u00e9partement de la Seine (1898-1923)", "url": "http://dx.doi.org/10.34847/nkl.acb724xs", "project-name": "Groupe annuaires et adresses - Consortium Huma-num Paris Time Machine\n", "project-website": "https://paris-timemachine.huma-num.fr/groupe-adresses-et-annuaires/", "authors": [ { "name": "Elgarrista", "surname": "Gabriela", "roles": [ "transcriber", "quality-control" ] }, { "name": "M\u00e9lanie-Becquet", "surname": "Fr\u00e9d\u00e9rique", "roles": [ "project-manager", "quality-control" ] }, { "name": "Brando", "surname": "Carmen", "roles": [ "project-manager", "quality-control" ] } ], "description": "Annuaire des propri\u00e9taires et des propri\u00e9t\u00e9s de Paris et du d\u00e9partement de la Seine. Lien dans le catalogue de la BNF : https://catalogue.bnf.fr/ark:/12148/cb32697229h. Cr\u00e9dits : Biblioth\u00e8que nationale de France. Donn\u00e9es v\u00e9rit\u00e9 de terrain r\u00e9sultant de la transcription et la segmentation manuelle d\u2019un \u00e9chantillon de 169 pages des annuaires appartenant aux volumes 1898 et 1923. Un mod\u00e8le de transcription HTR+ a \u00e9t\u00e9 entrain\u00e9 \u00e0 partir de cet \u00e9chantillon gr\u00e2ce \u00e0 Transkribus et est disponible sur cette plateforme en mode public. Ce mod\u00e8le est valable pour transcrire automatiquement les volumes de 1903 et 1913 et tout autre document imprim\u00e9 \u00e0 deux colonnes et en utilisant l'alphabet latin et particuli\u00e8rement en fran\u00e7ais. Le choix de l'\u00e9chantillon est fait par crit\u00e8re alphab\u00e9tique car c'est le mode d'organisation de l'information dans ce document. Les accolades pr\u00e9sentes dans le document n'ont pas \u00e9t\u00e9 segment\u00e9es. 118 pages pour entrainer et 51 pages pour validation.\nContexte et financement : Subvention DAHN (Dispositif de soutien \u00e0 l'archivistique et aux humanit\u00e9s num\u00e9riques) par le MESRI. Equipes : Consortium Paris Time Machine - TGIR Humanum EHESS / CNRS / LATTICE / INRIA Contact si besoin d'anonymiser les noms de personnes : carmen.brando@ehess.fr.\n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1898", "notAfter": "1923" }, "hands": { "count": "less-than-11", "precision": "estimated" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "volume": [ { "count": 169, "metric": "pages" }, { "count": 19022, "metric": "lines" }, { "count": 641401, "metric": "characters" } ], "transcription-guidelines": "Transcription diplomatique. Les accolades n'ont pas \u00e9t\u00e9 segment\u00e9es.\n", "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.34847/nkl.acb724xs,\n doi = {10.34847/NKL.ACB724XS},\n url = {https://nakala.fr/10.34847/nkl.acb724xs},\n author = {Brando, Carmen and Elgarrista, Gabriela and M\u00e9lanie-Becquet, Fr\u00e9d\u00e9rique},\n keywords = {Paris, Historical source material, HTR, Transcripci\u00f3n, Apprentissage (intelligence artificielle)},\n language = {fr},\n title = {Donn\u00e9es v\u00e9rit\u00e9 de terrain HTR+ Annuaire des propri\u00e9taires et des propri\u00e9t\u00e9s de Paris et du d\u00e9partement de la Seine (1898-1923)},\n publisher = {NAKALA - https://nakala.fr (Huma-Num - CNRS)},\n year = {2021}\n}\n", "_pid": "c326a6fee" }, "7a99090c5": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "University of Denver Jewish Consumptives Relief Society Medical Records Training and Validation Set", "url": "http://dx.doi.org/10.5281/zenodo.4243023", "authors": [ { "name": "Pham", "surname": "Kim", "orcid": "0000-0002-9115-4739", "roles": [ "project-manager" ] } ], "institutions": [], "description": "Training and validation set. Transcribed records available upon request.\nThe transcribed corpus of records from the Jewish Consumptive Relief Society contains data that include individually identifiable health information, among other sensitive information regarding persons and people.\n\nAll individuals for whom records are provided have been deceased for at least 70 years, but were they still living today, these records would be recognized as being protected health information under the US Health Insurance Portability and Accountability Act of 1996 (HIPAA).\n\nWhile HIPPA and other privacy laws no longer apply to these individuals, in providing these data the University of Denver wishes to foster research practices that express the utmost respect for the human beings whose lives are represented, at least in some part, in these collections. In addition, we ask researchers respect the lives of these individuals\u2019 ancestors and their communities.\n\nTo foster practices that honor patients, staff, nurses and physicians connected with the JCRS Sanitorium, as well as their families, ancestors and communities, we ask that researchers disclose their intended use of the collection for review by our Advisory Board (see reverse). This Board is comprised of ethicists, historians, librarians, attorneys, physicians, and members of the Jewish community.\n\nIn addition, we ask researchers agree to conduct their work under the following set of principles:\n\n1. I affirm the role of JCRS patients and staff as data creators and will avoid exploiting and/or dehumanizing them by treating them simply as data.\n2. My research will, when possible and appropriate, account for the contexts surrounding the JCRS subjects as data arise. My work will recognize that all data and datasets are shaped by decisions about how histories are recorded, remembered, and valued.\n3. If the nature of my work is such that I am sharing the life stories and/or narratives of individuals in these data, and I can do so with no potential harm to their reputation or that of their ancestors, I will honor them by naming them. If the nature of my work is such that I am exploring large-scale patterns in the dataset, and naming individuals serves no specific research purpose, I will anonymize and/or redact names within the data. \n4. If I am publishing the results of research conducted with these data, I will, if possible and appropriate, include a note of recognition and/or gratitude in my publication. We suggest a version of: \u201cThis work was made possible in part by the patients, staff, nurses, physicians, and community of the Jewish Consumptive Relief Society (JCRS). The people who lived, worked, and died at the JCRS sought to relieve human suffering. I am grateful to them.\u201d", "project-name": "Collections as Data - University of Denver Transcribing Handwritten Medical Records", "project-website": "https://du-collections-as-data.netlify.app/", "language": [ "eng" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1900", "notAfter": "1950" }, "hands": { "count": "unknown", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "lines", "count": 36027 }, { "metric": "characters", "count": 3494619 }, { "metric": "files", "count": 2660 }, { "metric": "regions", "count": 4254 } ], "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.4243023,\n doi = {10.5281/ZENODO.4243023},\n url = {https://zenodo.org/record/4243023},\n author = {Pham, Kim},\n title = {University of Denver Collections as Data - HTR Train and Validation Set JCRS_2020_5_27},\n publisher = {Zenodo},\n year = {2020},\n copyright = {Creative Commons Attribution 4.0 International}\n}\n", "_pid": "7a99090c5" }, "0be31b50c": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Ground truth data for printed Devanagari", "url": "https://doi.org/10.11588/data/EGOKEI", "authors": [ { "name": "Nicole", "surname": "Merkel-Hilf", "orcid": "0000-0002-0344-6169", "roles": [ "transcriber", "project-manager" ] }, { "name": "Daria", "surname": "Peshcherova", "roles": [ "support" ] } ], "institutions": [ { "name": "Heidelberg University Library" } ], "description": "Ground truth (GT) data (jpg and alto xml files) for an OCR model that recognizes printed text in Devanagari script.\n\nThe GT data was trained on Transkribus with the HTR+ engine. The training was performed on appr. 220 pages with appr. 27,000 words. The validation set was 10% of the training set.\n\nThe training material is comprised of letterpress printings from the Naval Kishore Press (Lakhnau, North India) from the late 19th and early 20th century in the Hindi, Sanskrit, Braj Bhasha and Awadhi languages.\n\nTranscription was performed by Nicole Merkel-Hilf (CATS Library / Heidelberg University Library) with support by Daria Peshcherova (CATS Library / Heidelberg University Library).", "project-name": "Naval Kishore Press - digital", "project-website": "https://digi.ub.uni-heidelberg.de/en/sammlungen/suedasien/navalkishore.html", "language": [ "hin", "san", "bra" ], "production-software": "Transkribus", "script": [ { "iso": "Deva" } ], "script-type": "only-typed", "time": { "notBefore": "1880", "notAfter": "1953" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "lines", "count": 4333 } ], "transcription-guidelines": "Diplomatic transcription, no correction of mispelling", "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.11588/data/egokei,\n doi = {10.11588/DATA/EGOKEI},\n url = {https://heidata.uni-heidelberg.de/citation?persistentId=doi:10.11588/data/EGOKEI},\n author = {Merkel-Hilf, Nicole},\n title = {Ground Truth data for printed Devanagari},\n publisher = {heiDATA},\n year = {2022}\n}\n", "_pid": "0be31b50c" }, "669f8cd0d": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Ground Truth data for printed Malayalam", "url": "https://doi.org/10.11588/data/L2KRZO", "authors": [], "institutions": [ { "name": "T\u00fcbingen University Library", "roles": [ "project-manager" ] } ], "description": "Ground Truth (GT) data (JPG and ALTO XML files) which can be used to train OCR models that recognize printed text in Malayalam script. The training material is gathered from 19th and 20th centuries prints.\n\nThe GT data was trained in Transkribus with the HTR+ and the PyLaia engine with a resulting CER of 2.29% on validation set with HTR+ and 3,20% with PyLaia. The training was performed on 43 pages with appr. 9,000 words. The validation set consisted of 5 pages (ca. 1,000 words).\n\nTranscription was performed by T\u00fcbingen University Library, the Ground Truth data was created by Elena Mucciarelli (University of Groningen) with support and model training by Dorothee Huff (T\u00fcbingen University Library). (2022-11-02)", "project-name": "DigitalSouthAsia", "project-website": "http://idb.ub.uni-tuebingen.de/digitue/southasia", "language": [ "mal" ], "production-software": "Transkribus", "script": [ { "iso": "Mlym" } ], "script-type": "only-typed", "time": { "notBefore": "1850", "notAfter": "1996" }, "hands": { "count": "unknown", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "pages", "count": 43 } ], "_bibtex": "@misc{https://doi.org/10.11588/data/l2krzo,\n doi = {10.11588/DATA/L2KRZO},\n url = {https://heidata.uni-heidelberg.de/citation?persistentId=doi:10.11588/data/L2KRZO},\n author = {{T\u00fcbingen University Library}},\n title = {Ground Truth data for printed Malayalam},\n publisher = {heiDATA},\n year = {2023}\n}\n", "_pid": "669f8cd0d" }, "0a089ab6d": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Incunabula Reichenau", "url": "https://doi.org/10.5281/zenodo.11046061", "authors": [ { "name": "Annika", "surname": "Stello", "orcid": "0000-0002-6305-4810", "roles": [ "project-manager" ] }, { "name": "Gerit", "surname": "Heim", "orcid": "0000-0002-5820-7771", "roles": [ "project-manager" ] }, { "name": "Katharina", "surname": "Ost", "orcid": "0000-0002-6234-9721", "roles": [ "transcriber" ] } ], "institutions": [], "description": "This data set contains the training data for the following three published Transkribus models\\:\nGerman Incunabula (Reichenau) Latin Incunabula (Reichenau) Latin/German Bilingual Incunabula (Reichenau)\nThis data set represents an excerpt of a collection of incunabula and post-incunabula of the former Reichenau monastery, now held at the Badische Landesbibliothek in Karlsruhe (see https://digital.blb-karlsruhe.de/topic/view/7530707). As, typically, 1-20 pages were drawn from single prints, it reflects a wide range of typefaces used by early printers from the German language area and Northern Italy.\nThe data was created as part of the project Digitalisierung und Volltexterkennung der ehemals Reichenauer Inkunabeln at the Badische Landesbibliothek, which was funded by the Stiftung Kulturgut Baden-W\u00fcrttemberg.", "project-name": "Digitalisierung und Volltexterkennung der ehemals Reichenauer Inkunabeln", "language": [ "lat", "deu" ], "production-software": "Transkribus", "automatically-aligned": false, "script": [ { "iso": "Latn" }, { "iso": "Goth" } ], "script-type": "only-typed", "time": { "notBefore": "1470", "notAfter": "1510" }, "hands": { "count": "more-than-10", "precision": "exact" }, "license": { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "pages", "count": 2200 } ], "transcription-guidelines": "Abbreviations are represented through special characters, please see the project repository for a full documentation.", "_bibtex": "@misc{https://doi.org/10.5281/zenodo.11046061,\n doi = {10.5281/ZENODO.11046061},\n url = {https://zenodo.org/doi/10.5281/zenodo.11046061},\n author = {{Badische Landesbibliothek} and Ost, Katharina and Stello, Annika and Heim, Gerrit},\n language = {de},\n title = {Training Data Incunabula Reichenau},\n publisher = {Zenodo},\n year = {2024},\n copyright = {Creative Commons Attribution Share Alike 4.0 International}\n}\n", "_pid": "0a089ab6d" }, "7dcc35e88": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Handwritten Text Recognition Ground Truth Set: StABS Ratsb\u00fccher O10, Urfehdenbuch X", "url": "https://doi.org/10.5281/zenodo.5153263", "authors": [ { "name": "Susanna", "surname": "Burghartz", "roles": [ "project-manager" ] }, { "name": "Calvi", "surname": "Sonia", "roles": [ "project-manager", "quality-control" ] }, { "name": "Vogeler", "surname": "Georg", "roles": [ "project-manager" ] }, { "name": "Baur", "surname": "Laila", "roles": [ "transcriber" ] }, { "name": "Egli", "surname": "Benedikt", "roles": [ "transcriber" ] }, { "name": "Gehrig", "surname": "Gabriela", "roles": [ "transcriber" ] }, { "name": "Heini", "surname": "Alexandra Isabelle", "roles": [ "transcriber" ] }, { "name": "Rossi", "surname": "Rosanna", "roles": [ "transcriber" ] }, { "name": "Siegrist", "surname": "Benjamin", "roles": [ "transcriber" ] }, { "name": "Wasmer", "surname": "Remo", "roles": [ "transcriber" ] }, { "name": "Zimmermann", "surname": "Lynn", "roles": [ "transcriber" ] }, { "name": "Schoch", "surname": "David", "roles": [ "aligner" ] }, { "name": "D\u00e4ngeli", "surname": "Peter", "roles": [ "digitization" ] }, { "name": "Hodel", "surname": "Tobias", "roles": [ "project-manager", "aligner" ] } ], "description": "Ground Truth for \"Urfehdenbuch X der Stadt Basel (1563-1569)\" at Staatsarchiv Basel-Stadt (StABS).", "project-website": "hdl:11471/1010.2.1", "language": [ "deu" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1563", "notAfter": "1569" }, "hands": { "count": "unknown", "precision": "estimated" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "lines", "count": 8000 } ], "transcription-guidelines": "See: http://gams.uni-graz.at/o:ufbas.1563", "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.5153263,\n doi = {10.5281/ZENODO.5153263},\n url = {https://zenodo.org/record/5153263},\n author = {Hodel, Tobias and Schoch, David and D\u00e4ngeli, Peter},\n keywords = {Handwritten Text Recognition, Ground Truth, Early Modern German Kurrent},\n language = {de},\n title = {Handwritten Text Recognition Ground Truth Set: StABS Ratsb\u00fccher O10, Urfehdenbuch X},\n publisher = {Zenodo},\n year = {2021},\n copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}\n}\n", "_pid": "7dcc35e88" }, "782b1e7da": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Charters and Records of K\u00f6nigsfelden Abbey and Bailiwick (1308-1662)", "url": "https://doi.org/10.5281/zenodo.5179361", "authors": [ { "name": "Hodel", "surname": "Tobias", "roles": [ "transcriber", "project-manager", "support" ] }, { "name": "Halter-Pernet", "surname": "Colette", "roles": [ "transcriber", "aligner", "project-manager", "quality-control", "digitization", "support" ] }, { "name": "Teuscher", "surname": "Simon", "roles": [ "project-manager" ] } ], "description": "The data set is the publication of the data of the scholarly edition \"Urkunden und Akten des Klosters und der Hofmeisterei K\u00f6nigsfelden\".", "project-website": "https://www.koenigsfelden.uzh.ch/", "language": [ "lat", "deu" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1292", "notAfter": "1570" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "lines", "count": 60000 } ], "transcription-guidelines": "See: https://www.koenigsfelden.uzh.ch/exist/apps/ssrq/intro.html#richtlinien", "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.5179361,\n doi = {10.5281/ZENODO.5179361},\n url = {https://zenodo.org/record/5179361},\n author = {Halter-Pernet, Colette and Teuscher, Simon and Hodel, Tobias and Barwitzki, Lukas and Egloff, Salome and Henggeler, Fabian and Nadig, Michael and Steinmann, Anina and Stettler, Sabine and Prada Ziegler, Ismail},\n keywords = {Scholarly Edition, Monastery, K\u00f6nigsfelden Abbey, Poor Clares, Franciscan Friars, Hapsburg, Handwritten Text Recognition},\n title = {Charters and Records of K\u00f6nigsfelden Abbey and Bailiwick (1308-1662)},\n publisher = {Zenodo},\n year = {2021},\n copyright = {Creative Commons Attribution 4.0 International}\n}\n", "_pid": "782b1e7da" }, "3989ce92b": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "GT and HTR of VOC (Dutch East-Asia Company), WIC (Dutch West-Asia Company) and notarial deeds.", "url": "https://doi.org/10.5281/zenodo.6414086", "authors": [ { "name": "Keijser", "surname": "Liesbeth", "roles": [ "transcriber", "project-manager" ] }, { "name": "Noppe", "surname": "Vincent" } ], "institutions": [ { "name": "National Archive Netherlands / Nationaal Archief", "roles": [ "digitization", "support" ] } ], "description": "6000 ground truth of VOC and notarial deeds and 3.000.000 HTR of VOC, WIC and notarial deeds\nThe National Archives of the Netherlands and Noord-Hollands Archief conducted a project using the Transkribus HTR (Handwritten Text Recognition) platform. The aim was to semi automatically transcribe 2 million pages of old Dutch texts.\n\nThe transcribed archives are 17th and 18th century documents from the Dutch East-Asia Company (VOC). And 19th century notarial deeds from Noord-Hollands Archief and other archives in the provinces.\n\nIn order to train the HTR software a team produced transcriptions of approximately 6000 scans. The scans are randomly selected from the dataset and contain hundreds of hands. With these transcriptions a model is trained that can recognize more than 90% of the characters correctly. Transkribus transcribed the 2 million scans automatically using the trained model.\n\nLater on, 1 million extra scans concerning the West India Company (WIC) were transcribed automatically without adding extra ground truth or training. These archives are from the 17th and 18th century.\n\nThe datasets published in Zenodo contain the ground truth (scans in JPG, transcription in PAGE XML) and the HTR results (in PAGE XML and TXT). See the overview on the Zenodo page.\n\nA specification on which archives have been transcribed (both GT and HTR) can be found on the Zenodo.\n\nFor open data access of scans and inventories of the National Archives click here: https://www.nationaalarchief.nl/onderzoeken/open-data/archiefinventarissen-digitale-objecten-en-scans-van-archieven \nDisclaimer: due to a variety of languages used and the bad state of the documents the HTR results of \"1.05.21, Dutch series Guyana\" can be of poor quality.", "project-name": "De ijsberg zichtbaar maken", "project-website": "https://www.nationaalarchief.nl/beleven/nieuws/kijk-symposium-de-ijsberg-zichtbaar-maken-terug#:~:text=In%20het%20project%20De%20IJsberg,de%20website%20zoekintranscripties.nl%20ontwikkeld.", "language": [ "nld" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1600", "notAfter": "1899" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "pages", "count": 6000 }, { "count": 251889, "metric": "lines" }, { "count": 6350, "metric": "files" }, { "count": 10735, "metric": "regions" }, { "count": 24432166, "metric": "characters" } ], "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.6414086,\n doi = {10.5281/ZENODO.6414086},\n url = {https://zenodo.org/doi/10.5281/zenodo.6414086},\n author = {Liesbeth Keijser, },\n keywords = {Transciptions, Verenigde Oost-Indische Compagnie, West-Indische Compagnie, Notarial deeds, Nationaal Archief, Noord-Hollands Archief, Transkribus},\n language = {odt},\n title = {6000 ground truth of VOC and notarial deeds 3.000.000 HTR of VOC, WIC and notarial deeds},\n publisher = {Zenodo},\n year = {2020},\n copyright = {Creative Commons Attribution 4.0 International}\n}\n", "_pid": "3989ce92b" }, "58807c215": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Dataset for late medieval Castilian text recognition ", "url": "https://doi.org/10.5281/zenodo.7386489", "authors": [ { "name": "Gille Levenson", "surname": "Matthias", "orcid": "0000-0001-9488-5986", "roles": [ "transcriber", "quality-control" ] } ], "institutions": [], "description": "HTR/OCR open access gold corpus for spanish late medieval sources, based\non the allographetic transcription of more than 300 pages of several manuscripts of the Regimiento de los Pr\u00edn\u00e7ipes, as well as a first set of general transcription models trained with kraken and out-of-domain test data. See https://doi.org/10.5281/zenodo.7387376 for full description of the dataset.", "language": [ "spa" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1300", "notAfter": "1500" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "lines", "count": 28000 } ], "transcription-guidelines": "Allographetic transcription. See the article (https://doi.org/10.5281/zenodo.7387376) for full transcription guidelines.\n320 pages in-domain; 40 pages out-of-domain", "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.7386489,\n doi = {10.5281/ZENODO.7386489},\n url = {https://zenodo.org/doi/10.5281/zenodo.7386489},\n author = {Matthias Gille Levenson, },\n keywords = {OCR, HTR, dataset, allographetic, medieval castilian},\n language = {en},\n title = {Towards a general open dataset and model for late medieval Castilian text recognition (HTR/OCR). Datasets and scripts},\n publisher = {Zenodo},\n year = {2023},\n copyright = {Creative Commons Attribution Non Commercial Share Alike 4.0 International}\n}\n", "_pid": "58807c215" }, "939d02cb9": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Klosterneuburg, Stiftsbibl., Cod. 48 - Ground Truth: Initial Release", "url": "https://doi.org/10.5281/zenodo.7466927", "authors": [ { "name": "Berger", "surname": "Michael", "orcid": "0000-0002-6627-5272" }, { "name": "Bolte", "surname": "Henrike" }, { "name": "F\u00fchrer", "surname": "Veronika", "orcid": "0000-0003-3145-4083" }, { "name": "Hausleitner", "surname": "Felix", "orcid": "0000-0002-9788-8127" }, { "name": "Hutterer", "surname": "Sarah" }, { "name": "L\u00fcthi", "surname": "Tim", "orcid": "0000-0003-1925-7175" }, { "name": "Nancu", "surname": "Mihaela" }, { "name": "Passoni", "surname": "Erica" }, { "name": "Pataki", "surname": "Katalin", "orcid": "0000-0003-0331-8295" }, { "name": "Schr\u00f6cksnadel", "surname": "Sophie" }, { "name": "Verri", "surname": "Giovanni", "orcid": "0000-0002-1297-2152" }, { "name": "Wegener", "surname": "Dennis", "orcid": "0000-0002-9410-9191" } ], "institutions": [], "description": "This is ground truth for the vast collection of sermons of Nikolaus von Dinkelsb\u00fchl (ca. 1360 to 17th March 1433), translated and reorganised by a German redactor, from the 15th century has never been edited until now. It consists of 361 folios of parchment and paper. The text speaks about various topics such as fasting and other religious practices. Being one of the leading intellectuals of his time, Nikolaus von Dinkelsb\u00fchl also contributed to the development of the University of Vienna. The manuscript was probably produced in the vicinity of Klosterneuburg in Austria and is still kept there today (Shelfmark: Cod. 48).\n\nData collection and ground truth creation:\n\nThe edition at hand was produced by an international team of researchers from various fields in the context of the Vienna HTR Winter School 2022 with the help of Transkribus Expert Client.\n\nWe uploaded the images of the manuscript into the Transkribus platform, applied the line recognition tool and manually copied the transcribed text lines into the recognised line boxes. Various models were trained with the ground truth (20% of the entire codex) created by the team.\n\nImages of the Klosterneuburg, Augustiner-Chorherrenstift, Cod. 48 are available at: https://manuscripta.at/diglit/AT5000-48/0001", "project-name": "HTR Winter School 2022, Vienna", "language": [ "gmh" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1440", "notAfter": "1449" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "pages", "count": 68 }, { "metric": "lines", "count": 4605 } ], "automatically-aligned": false, "_bibtex": "@misc{https://doi.org/10.5281/zenodo.7466927,\n doi = {10.5281/ZENODO.7466927},\n url = {https://zenodo.org/record/7466927},\n author = {Berger, Michael and Bolte, Henrike and F\u00fchrer, Veronika and Hausleitner, Felix and Hutterer, Sarah and L\u00fcthi, Tim and Nancu, Mihaela and Passoni, Erica and Pataki, Katalin and Schr\u00f6cksnadel, Sophie and Verri, Giovanni and Wegener, Dennis and Hofert, Sandra},\n keywords = {Digital Humanities, Handwritten Text Recognition, German, Nikolaus-von-Dinkelsb\u00fchl-Redaktor},\n title = {Klosterneuburg, Stiftsbibl., Cod. 48 - Ground Truth: Initial Release},\n publisher = {Zenodo},\n year = {2022},\n copyright = {Creative Commons Attribution 4.0 International}\n}\n", "_pid": "939d02cb9" }, "69f2aaf10": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "GT4HistCommentLayout: Layout Ground Truth for Historical Commentaries", "url": "https://github.com/AjaxMultiCommentary/GT-commentaries-OLR", "authors": [ { "name": "Matteo", "surname": "Romanello", "orcid": "0000-0002-7406-6286", "roles": [ "project-manager" ] }, { "name": "Sven", "surname": "Najem-Meyer", "orcid": "0000-0002-3661-4579", "roles": [ "transcriber", "quality-control" ] }, { "name": "Carla", "surname": "Amaya", "roles": [ "transcriber" ] } ], "description": "This dataset contains layout annotations for ca. 370 pages sampled from 8 public domain classical commentaries, published in the 19th century in English, German and Latin. The commentaries concern Ancient Greek and Latin works from prose and poetry (caveat: AGreek poetry is slightly over-represented). Pages were annotated according to a taxonomy mapped to the SegmOnto controlled vocabulary.", "project-name": "Ajax Multi-Commentary", "project-website": "https://mromanello.github.io/ajax-multi-commentary/", "language": [ "eng", "deu", "lat", "grc" ], "production-software": "Kraken + VGG Image Annotator (VIA)", "script": [ { "iso": "Latn" }, { "iso": "Grek" } ], "script-type": "only-typed", "time": { "notBefore": "1835", "notAfter": "1903" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 0 }, { "metric": "files", "count": 371 }, { "metric": "lines", "count": 0 }, { "metric": "regions", "count": 2386 } ], "transcription-guidelines": "SegmOnto guidelines (v. 0.9)", "citation-file-link": "https://github.com/AjaxMultiCommentary/GT-commentaries-layout/blob/master/CITATION.cff", "characters": { "mode": "NFD", "members": [] }, "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Matteo and Najem-Meyer, Sven and Amaya, Carla},\ndoi = {10.5281/zenodo.7271729},\ntitle = {GT4HistCommentLayout: Layout Ground Truth for Historical Commentaries}\n}\n", "_apa": "Matteo, Najem-Meyer S., Amaya C. GT4HistCommentLayout: Layout Ground Truth for Historical Commentaries (version 1.0). DOI: 10.5281/zenodo.7271729\n", "_pid": "69f2aaf10" }, "dc7677d2b": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Fabliaux", "url": "https://github.com/CIHAM-HTR/Fabliaux", "authors": [ { "name": "Corinne", "surname": "Pierreville", "orcid": "0009-0003-3074-3841", "roles": [ "project-manager" ] }, { "name": "Ariane", "surname": "Pinche", "orcid": "0000-0002-7843-5050", "roles": [ "transcriber", "aligner", "quality-control" ] } ], "institutions": [], "description": "HTR data sets from medieval manuscripts (13th-14th c.) collecting \"fabliaux\" funded by Biblissima+", "project-website": "https://projet.biblissima.fr/fr/appels-projets/projets-retenus/fabliaux", "language": [ "fro" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1200", "notAfter": "1402" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://github.com/CIHAM-HTR/Fabliaux/blob/master/CITATION.cff", "transcription-guidelines": "The data follow the standards recommended by the CREMMALAB project, see Ariane Pinche. Transcription Guide for 10th to 15th Century Manuscripts. 2022. \u27e8hal-03697382\u27e9", "volume": [ { "metric": "characters", "count": 44963 }, { "metric": "files", "count": 25 }, { "metric": "lines", "count": 2070 }, { "metric": "regions", "count": 94 } ], "characters": { "mode": "NFD", "members": [ "e", "i", "s", "a", "t", "u", "o", "n", "r", "l", "m", "c", "d", "\u0303", "p", "f", "h", "b", "\u204a", "g", ".", "q", "z", "\u033e", "Q", "\ua751", "S", "x", "I", "L", "D", "C", "\u0365", "E", "A", "\ua770", "T", "k", "\ua76f", "M", "N", "O", "P", "U", "\u0363", "y", "F", "9", "\ua758", "B", "G", "J", "1", "/", "\u1e9c", "\u0142", "\u27e6", "\u27e7", "\u1dd1", "R", "7", "H", "'", "\u0364", "w", ":", "4", "0", "6", "8", "5", "K", "\uf1ac", "\u0366", "v", "\u036b", "V", "\u1de4", "\u205c", "3", "\u0111", "X", "\u2038", "\u1de0", "2", "\ua753" ] }, "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Pinche, Ariane and Pierreville, Corinne},\nmonth = {4},\ntitle = {Fabliaux},\nurl = {https://github.com/CIHAM-HTR/Fabliaux/data},\nyear = {2023}\n}\n", "_apa": "Pinche A., Pierreville C. (2023). Fabliaux URL: https://github.com/CIHAM-HTR/Fabliaux/data\n", "_pid": "dc7677d2b" }, "4b17f1293": { "authors": [ { "name": "Davide", "roles": [ "transcriber", "aligner" ], "surname": "Aruta" }, { "name": "Martina", "roles": [ "transcriber", "aligner" ], "surname": "Lenzi" }, { "name": "Armelle", "orcid": "0000-0001-7938-2686", "roles": [ "transcriber", "aligner" ], "surname": "Le Hu\u00ebrou" }, { "name": "Maryl\u00e8ne", "orcid": "0000-0002-9250-370X", "roles": [ "project-manager" ], "surname": "Possama\u00ef" }, { "name": "Ariane", "orcid": "0000-0002-7843-5050", "roles": [ "quality-control" ], "surname": "Pinche" } ], "characters": { "members": [ "e", "i", "u", "s", "a", "t", "n", "r", "o", "l", "c", "m", "d", "p", ".", "q", "\u0303", "g", "b", "f", "z", "h", "y", "x", "-", "\u0365", "\u0363", "\u204a", "E", "\u00b6", "\u033e", "\ua759", "C", "\ua770", "\u0366", "\ua751", "S", "\ua753", "Q", "H", "\ua76f", "I", "M", "\u036d", "2", "L", "\u036b", "D", "\ua775", "T", "\u0368", "A", "\u0142", "\u036c", "\u0364", "\u1dd1", "N", "O", "U", "P", "R", "\u0127", ":", "F", "\ua76d", "7", "\u1d48", "\uf1ac", "3", "\u27e6", "\u27e7", "Y", "\u0367", "\u0111", "G", "1", "9", "B", ",", "\ua758" ], "mode": "NFD" }, "citation-file-link": "https://github.com/CIHAM-HTR/Liber/blob/main/CITATION.cff", "description": "HTR datasets of medieval manuscripts (14th-15th c.) with Pierre Bersuire\u2019s translation into Old French of the work of Titus Livius and Nicolas Trevet Commentaries", "format": "Alto-XML", "hands": { "count": "1", "precision": "estimated" }, "institutions": [], "language": [ "fro", "lat" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-website": "https://anr.fr/Projet-ANR-21-CE27-0008", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "sources": [ { "link": "https://github.com/CIHAM-HTR/Liber", "reference": "Aruta, D., Lenzi, M., Le Hu\u00ebrou, A., Possama\u00ef, M., & Pinche, A. (2023). Liber [Data set]. https://github.com/CIHAM-HTR/Liber/data" } ], "time": { "notAfter": "1400", "notBefore": "1300" }, "title": "Liber", "transcription-guidelines": "Data follow the standards recommended by the CREMMA projects, see Ariane Pinche. Transcription Guide for 10th to 15th Century Manuscripts. 2022. hal-03697382 - and Thibault Cl\u00e9rice, Malamatenia Vlachou-Efstathiou, Alix Chagu\u00e9. CREMMA Medii Aevi: Literary manuscript text recognition in Latin. Journal of Open Humanities Data, 2023, 9, pp.4. \u27e810.5334/johd.97\u27e9. \u27e8hal-03828353v5\u27e9", "url": "https://github.com/CIHAM-HTR/Liber", "volume": [ { "count": 134899, "metric": "characters" }, { "count": 37, "metric": "files" }, { "count": 3789, "metric": "lines" }, { "count": 152, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Aruta, Davide and Lenzi, Martina and Le Hu\u00ebrou, Armelle and Possama\u00ef, Maryl\u00e8ne and Pinche, Ariane},\nmonth = {4},\ntitle = {Liber},\nurl = {https://github.com/CIHAM-HTR/Liber/data},\nyear = {2023}\n}\n", "_apa": "Aruta D., Lenzi M., Le Hu\u00ebrou A., Possama\u00ef M., Pinche A. (2023). Liber URL: https://github.com/CIHAM-HTR/Liber/data\n", "_pid": "4b17f1293" }, "c6e6eefe0": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "FoNDUE Spanish chapbooks 19th c. Dataset", "url": "https://github.com/DesenrollandoElCordel/FoNDUE-Spanish-chapbooks-Dataset", "authors": [ { "name": "Carta", "surname": "Constance", "roles": [ "transcriber", "project-manager" ] }, { "name": "Leblanc", "surname": "\u00c9lina", "roles": [ "digitization" ] }, { "name": "Jacsont", "surname": "Pauline", "roles": [ "digitization" ] }, { "name": "Palacios", "surname": "Belinda", "roles": [ "transcriber", "quality-control" ] }, { "name": "Bermudez", "surname": "Luana", "roles": [ "transcriber", "quality-control" ] } ], "description": "Digital editions of the second part of the Genevan Spanish chapbooks collection (19th c.).", "project-name": "Desenrollando El Cordel", "project-website": "https://github.com/DesenrollandoElCordel", "language": [ "cat", "spa", "lat" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1770", "notAfter": "1920" }, "hands": { "count": "more-than-10", "precision": "exact" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "sources": [ { "reference": "", "link": "https://unige.swisscovery.slsp.ch/permalink/41SLSP_UGE/btt5ev/alma991008229029705502" }, { "reference": "", "link": "https://unige.swisscovery.slsp.ch/permalink/41SLSP_UGE/kjkm12/alma991002834309705502" } ], "volume": [ { "metric": "characters", "count": 270718 }, { "metric": "lines", "count": 12526 }, { "metric": "pages", "count": 198 } ], "citation-file-link": "https://github.com/DesenrollandoElCordel/FoNDUE-Spanish-chapbooks-Dataset/blob/main/Grountruth/CITATION.cff", "transcription-guidelines": "Les r\u00e8gles de transcription suivante ont \u00e9t\u00e9 adopt\u00e9es :\n- Respecter les accents ;\n- Respecter la casse ;\n- Respecter la ponctuation ;\n- Respecter les espaces ;\n- Respecter les retours \u00e0 la ligne ;\n- Respecter la graphie des mots (ne pas corriger les erreurs s\u2019il y en a) ;\n- Supprimer le bruit (t\u00e2ches qui ont \u00e9t\u00e9 prises pour du texte par l\u2019OCR).", "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "_pid": "c6e6eefe0" }, "8239384d8": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "EHRI Dataset", "url": "https://github.com/FloChiff/ehri-dataset", "project-name": "European Holocaust Research Infrastructure\n", "project-website": "https://www.ehri-project.eu/", "authors": [ { "name": "Floriane", "surname": "Chiffoleau", "roles": [ "transcriber" ] }, { "name": "Sarah", "surname": "Beniere", "roles": [ "transcriber" ] }, { "name": "Michal", "surname": "Frankl", "roles": [ "transcriber" ] }, { "name": "Wolfgang", "surname": "Schellenbacher", "roles": [ "transcriber" ] }, { "name": "Zolt\u00e1n", "surname": "V\u00e1gi", "roles": [ "transcriber" ] }, { "name": "G\u00e1bor", "surname": "K\u00e1d\u00e1r", "roles": [ "transcriber" ] }, { "name": "Magdalena", "surname": "Sedlick\u00e1", "roles": [ "transcriber" ] }, { "name": "Miriam", "surname": "Schulz", "roles": [ "transcriber" ] }, { "name": "Christine", "surname": "Schmidt", "roles": [ "transcriber" ] }, { "name": "Jessica", "surname": "Green", "roles": [ "transcriber" ] }, { "name": "Martina", "surname": "Ravagnan", "roles": [ "transcriber" ] }, { "name": "Daniela", "surname": "Bart\u00e1kov\u00e1", "roles": [ "transcriber" ] }, { "name": "Judith", "surname": "Levin", "roles": [ "transcriber" ] }, { "name": "Daphna", "surname": "Sehayek", "roles": [ "transcriber" ] }, { "name": "Micha\u0142", "surname": "Czajka", "roles": [ "transcriber" ] }, { "name": "Marta", "surname": "Wojas", "roles": [ "transcriber" ] }, { "name": "Dagmara", "surname": "Che\u0142stowska", "roles": [ "transcriber" ] }, { "name": "Winfried", "surname": "Garscha", "roles": [ "transcriber" ] }, { "name": "Claudia", "surname": "Kuretsidis-Haider", "roles": [ "transcriber" ] } ], "description": "Multilingual dataset from various corpus of the EHRI project \n", "language": [ "eng", "ces", "deu", "slk", "hun", "pol", "dan" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1936", "notAfter": "1958" }, "hands": { "count": "unknown", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "files", "count": 252 }, { "metric": "characters", "count": 540645 }, { "metric": "lines", "count": 9203 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_pid": "8239384d8" }, "3f3c2f48e": { "authors": [ { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Gabay" }, { "name": "Jessica", "roles": [ "transcriber" ], "surname": "Da Silva Fernandes" }, { "name": "Myriam", "roles": [ "transcriber" ], "surname": "Perregaux" } ], "automatically-aligned": false, "characters": { "members": [ "e", "t", "o", "n", "a", "i", "r", "s", "h", "d", "l", "c", "u", "m", "f", "g", "p", ",", "y", "w", "b", "v", ".", "k", "1", "I", "\u00ac", "C", "S", "T", "-", "9", "A", ";", "8", "M", "x", "4", "2", "/", "6", "N", "G", "R", "D", "q", "0", "\"", "H", "E", "5", "z", "P", "W", "U", "7", "(", "j", ")", "3", "B", "'", "\u2019", "L", ":", "Y", "O", "V", "Q", "\u2013", "?", "F", "J", "!", "K", "\u201c", "[", "]", "X", "Z", "\u0301", "\u201d", "\u2014" ], "mode": "NFD" }, "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-EN-PRINT-20/blob/master/CITATION.cff", "description": "Various prints (academic, archives, novels\u2026)", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "eng" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1900", "notBefore": "1999" }, "title": "FONDUE-EN-PRINT-20", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-EN-PRINT-20", "volume": [ { "count": 82834, "metric": "characters" }, { "count": 30, "metric": "files" }, { "count": 1728, "metric": "lines" }, { "count": 72, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gabay, Simon and Perregaux, Myriam and Da Silva Fernandes, Jessica},\nmonth = {12},\ntitle = {FONDUE-EN-PRINT-20},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-EN-PRINT-20},\nyear = {2023}\n}\n", "_apa": "Gabay S., Perregaux M., Da Silva Fernandes J. (2023). FONDUE-EN-PRINT-20 (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-EN-PRINT-20\n", "_pid": "3f3c2f48e" }, "5d6e6d6d8": { "authors": [ { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Gabay" }, { "name": "Carmen", "orcid": "0009-0004-1508-9076", "roles": [ "transcriber" ], "surname": "Carrasco Luj\u00e1n" } ], "automatically-aligned": false, "characters": { "members": [ "e", "a", "o", "s", "n", "r", "i", "l", "d", "u", "t", "c", "m", "p", ".", "\u0301", ",", "b", "g", "y", "q", "h", "v", "\u00ac", "f", "j", "z", "A", "E", ";", "\u2013", "!", "\u0303", "S", "x", "I", "P", "C", "L", "B", "U", "D", "R", ":", "T", "?", "O", "N", "0", "H", "Y", "\u00bf", "M", "V", "\u00a1", "1", "J", "2", "\u2014", "\"", "G", "F", "k", "8", "7", "4", "5", "-", "Q", "6", "3", "\u0300", "K", "9", "(", ")", "\u0308", "X", "\u00bb", "W", "[", "]", "Z", "&", "w", "*", "\u00a7", "\u00a0", "\u00b0", "\u01dd", "\u00ab" ], "mode": "NFD" }, "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-ES-PRINT-19/blob/master/CITATION.cff", "description": "Novels written in Spanish", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "spa" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1800", "notBefore": "1899" }, "title": "FONDUE-ES-PRINT-19", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-ES-PRINT-19", "volume": [ { "count": 64038, "metric": "characters" }, { "count": 48, "metric": "files" }, { "count": 1668, "metric": "lines" }, { "count": 129, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gabay, Simon and Carrasco Luj\u00e1n, Carmen},\nmonth = {2},\ntitle = {FONDUE-ES-PRINT-19},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-ES-PRINT-19},\nyear = {2024}\n}\n", "_apa": "Gabay S., Carrasco Luj\u00e1n C. (2024). FONDUE-ES-PRINT-19 (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-ES-PRINT-19\n", "_pid": "5d6e6d6d8" }, "625707f6d": { "authors": [ { "name": "Peter", "roles": [ "transcriber" ], "surname": "Nahon" }, { "name": "Marco", "roles": [ "transcriber" ], "surname": "Cicchini" }, { "name": "Yvan", "roles": [ "transcriber" ], "surname": "Jaureguy" }, { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Gabay" }, { "name": "Loraine", "orcid": "0000-0002-9598-9151", "roles": [ "transcriber" ], "surname": "Chappuis" } ], "automatically-aligned": false, "characters": { "members": [ "e", "a", "s", "r", "t", "n", "u", "i", "o", "l", "d", "c", "m", "p", "v", "\u0301", ".", ",", "q", "h", "f", "g", "b", "'", "y", "L", "M", "C", "S", "x", "j", "E", "1", "z", "\u0300", "I", "\u2019", "\u0302", "2", "J", "+", "D", "V", "\u00ac", "\u02b3", "^", "P", ":", "4", "3", "X", "R", "7", "A", "\u0308", "B", "6", ";", "5", "T", "G", "9", "\u1d49", "0", "8", "N", "\u2014", "\u0327", "O", "F", "-", "\u1d57", "?", "\u1d48", "Q", "k", "H", "\u27e6", "\u27e7", "[", "]", "\u0153", "\u02e2", "\u02e1", "\u1d47", "Z", "\u00a0", "W", "\u03b1", "w", "U", "\u0303", "(", ")", "\u0313", "\u03bf", "\u03bd", "&", "K", "\u2071", "\u03bc", "\u03c9", "\u03c4", "\u03b4", "\u03b5", "\u00b0", "Y", "\u0304", "\u03c1", "\u03c6", "{", "\u03a8", "\u03b9", "\u03c5", "\u03c0", "\u03bb", "$", "/" ], "mode": "NFD" }, "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-FR-MSS-18/blob/master/CITATION.cff", "description": "French Manuscripts of the 18th", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "fra" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1799", "notBefore": "1700" }, "title": "FONDUE-FR-MSS-18", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-FR-MSS-18", "volume": [ { "count": 232519, "metric": "characters" }, { "count": 228, "metric": "files" }, { "count": 6446, "metric": "lines" }, { "count": 709, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gabay, Simon and Nahon, Peter and Cicchini, Marco and Jaureguy, Yvan and Chappuis, Loraine},\nmonth = {11},\ntitle = {FoNDUE-FR-MSS-18},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-FR-MSS-18},\nyear = {2023}\n}\n", "_apa": "Gabay S., Nahon P., Cicchini M., Jaureguy Y., Chappuis L. (2023). FoNDUE-FR-MSS-18 (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-FR-MSS-18\n", "_pid": "625707f6d" }, "e7af0e31e": { "authors": [ { "name": "Gabay", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Simon" } ], "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-16/blob/master/CITATION.cff", "description": " Transcriptions of French 16th c. prints ", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "language": [ "fra" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1600", "notBefore": "1500" }, "title": "FONDUE-FR-PRINT-16", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-16", "volume": [ { "count": 504656, "metric": "characters" }, { "count": 930, "metric": "files" }, { "count": 17817, "metric": "lines" }, { "count": 2829, "metric": "regions" } ], "automatically-aligned": false, "_pid": "e7af0e31e" }, "7c7c90001": { "authors": [ { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Gabay" }, { "name": "Sophie", "orcid": "0009-0005-6841-0158", "roles": [ "transcriber" ], "surname": "Dolto" } ], "automatically-aligned": false, "characters": { "members": [ "e", "a", "s", "i", "t", "r", "n", "u", "l", "o", "d", "c", "p", "m", "\u0301", ",", ".", "v", "\u2019", "g", "f", "b", "q", "h", "\u0300", "\u0302", "x", "j", "L", "y", "-", "I", "'", "\u2014", "A", "G", "E", "M", "P", "C", "B", "J", "D", "z", "\u0327", "S", "!", "T", "?", "\u00ac", "V", ";", "U", "O", "R", "Q", ":", "1", "k", "F", "H", "\u0153", "0", "(", ")", "\u201c", "2", "N", "6", "9", "8", "5", "\u0308", "3", "w", "W", "4", "Y", "\u201d", "\u00a0", "7", "Z", "*", "/", "K", "\"", "\u00ab", "\u00bb" ], "mode": "NFD" }, "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-20/blob/master/CITATION.cff", "description": "French novels", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "eng" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1900", "notBefore": "1999" }, "title": "FONDUE-FR-PRINT-20", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-20", "volume": [ { "count": 81599, "metric": "characters" }, { "count": 55, "metric": "files" }, { "count": 1604, "metric": "lines" }, { "count": 64, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gabay, Simon and Dolto, Sophie},\nmonth = {2},\ntitle = {FONDUE-FR-PRINT-20},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-20},\nyear = {2024}\n}\n", "_apa": "Gabay S., Dolto S. (2024). FONDUE-FR-PRINT-20 (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-FR-PRINT-20\n", "_pid": "7c7c90001" }, "ced371a4c": { "authors": [ { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Gabay" }, { "name": "Maddalena", "roles": [ "transcriber" ], "surname": "Zaglio" } ], "automatically-aligned": false, "characters": { "members": [ "e", "a", "i", "o", "r", "n", "t", "l", "s", "c", "d", "u", "p", "m", "v", ",", "g", "h", "f", "b", ".", "z", "\u0300", "\u00ac", "q", "I", "-", "C", "A", "'", "\u2019", "M", "P", "E", "\"", "S", ";", "L", "=", "T", "R", "O", "D", "V", "G", ":", "N", "1", "!", "B", ")", "\u2014", "4", "(", "F", "[", "]", "Q", "2", "?", "0", "3", "9", "5", "U", "\u00b0", "\u2b2a", "6", "y", "Z", "k", "\u15c5", "K", "x", "\u00a7", "H", "8", "X", "7", "W", "\u2013", "^", "\u201c", "\u1455", "\u15de", "w" ], "mode": "NFD" }, "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-IT-PRINT-20/blob/master/CITATION.cff", "description": "Archives and novels", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "ita" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1900", "notBefore": "1999" }, "title": "FONDUE-IT-PRINT-20", "transcription-guidelines": "SegmOnto", "url": "https://github.com/FoNDUE-HTR/FONDUE-IT-PRINT-20", "volume": [ { "count": 54628, "metric": "characters" }, { "count": 28, "metric": "files" }, { "count": 1150, "metric": "lines" }, { "count": 67, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gabay, Simon and Zaglio, Maddalena},\nmonth = {12},\ntitle = {FONDUE-IT-PRINT-20},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-IT-PRINT-20},\nyear = {2023}\n}\n", "_apa": "Gabay S., Zaglio M. (2023). FONDUE-IT-PRINT-20 (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-IT-PRINT-20\n", "_pid": "ced371a4c" }, "dd2357d22": { "authors": [ { "name": "Gabay", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Simon" }, { "name": "Joyeux-Prunel", "orcid": "0000-0003-1046-7002", "roles": [ "transcriber" ], "surname": "B\u00e9atrice" }, { "name": "Rizzello", "orcid": "0000-0003-0131-192X", "roles": [ "transcriber" ], "surname": "Martina" }, { "name": "Berlincourt", "orcid": "0000-0001-5739-8839", "roles": [ "transcriber" ], "surname": "Val\u00e9ry" }, { "name": "Rizzi", "orcid": "0000-0002-8542-7091", "roles": [ "transcriber" ], "surname": "Elena Maria" }, { "affiliation": "Ca' Foscari University", "name": "Tesser", "orcid": "0000-0001-9553-1100", "roles": [ "transcriber" ], "surname": "Stefania" }, { "name": "Bukvic", "roles": [ "transcriber" ], "surname": "Victoria" }, { "name": "Diaz", "roles": [ "transcriber" ], "surname": "Jaime" }, { "name": "Aebi", "roles": [ "transcriber" ], "surname": "Guillaume" }, { "name": "Bickel", "roles": [ "transcriber" ], "surname": "Raoul" } ], "characters": { "members": [ "e", "n", ".", "r", "i", "a", "u", "t", "l", "s", "0", "o", "h", "d", "1", "c", "2", "m", "g", "5", "\u0308", "3", "f", "b", ",", "M", "B", "4", "S", "A", "6", "F", "G", "7", "8", "v", "p", ")", "(", "L", "9", "z", "P", "k", "R", "V", "D", "K", "y", "W", "E", "H", "C", "\u2013", "\u0300", "w", "J", "T", "Z", "\u0301", "-", "N", "I", "\u2014", "q", "O", "U", "\u00a0", "\u0302", "\u2019", "x", "j", "\"", "\u00bb", "\u00ac", ";", "\u0153", "X", "\u0327", "Q", "'", ":", "\u00df", "\u00ab", "?", "\u00a7", "Y", "\u00e6", "[", "]", "/", "\u2020", "!", "\u201e", "\u201c", "\u2026", "&" ], "mode": "NFD" }, "citation-file-link": "https://raw.githubusercontent.com/FoNDUE-HTR/FONDUE-MLT-ART/main/CITATION.cff", "description": "Swiss art exhibitions catalogues", "format": "Alto-XML", "hands": { "count": "1", "precision": "exact" }, "institutions": [], "language": [ "deu" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1961", "notBefore": "1842" }, "title": "FONDUE-MLT-ART", "transcription-guidelines": "No segmentation, only transcription.", "url": "https://github.com/FoNDUE-HTR/FONDUE-MLT-ART", "volume": [ { "count": 141786, "metric": "characters" }, { "count": 215, "metric": "files" }, { "count": 5664, "metric": "lines" }, { "count": 60, "metric": "pages" }, { "count": 215, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Joyeux-Prunel, B\u00e9atrice and Gabay, Simon and Rizzello, Martina and Berlincourt, Val\u00e9ry and Rizzi, Elena Maria and Tesser, Stefania and Bukvic, Victoria and Diaz, Jaime and Aebi, Guillaume and Bickel, Raoul},\nmonth = {11},\ntitle = {FONDUE-MLT-ART},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-MLT-ART},\nyear = {2023}\n}\n", "_apa": "Joyeux-Prunel B., Gabay S., Rizzello M., Berlincourt V., Rizzi E.M., Tesser S., Bukvic V., Diaz J., Aebi G., Bickel R. (2023). FONDUE-MLT-ART (version 1.0). URL: https://github.com/FoNDUE-HTR/FONDUE-MLT-ART\n", "_pid": "dd2357d22" }, "74eadcb57": { "authors": [ { "name": "Pradier", "orcid": "0000-0002-3476-7248", "roles": [ "transcriber" ], "surname": "Fr\u00e9d\u00e9rine" }, { "name": "Gabay", "orcid": "0000-0001-9094-4475", "roles": [ "transcriber", "project-manager", "quality-control", "support" ], "surname": "Simon" }, { "name": "Kervegan", "orcid": "0000-0003-2821-8821", "roles": [ "transcriber" ], "surname": "Paul" }, { "name": "Jan\u00e8s", "orcid": "0000-0002-8971-6173", "roles": [ "transcriber" ], "surname": "Juliette" }, { "name": "S\u00e1nchez Oeconomo", "orcid": "0000-0002-8591-5394", "roles": [ "transcriber" ], "surname": "Esteban" } ], "citation-file-link": "https://github.com/FoNDUE-HTR/FONDUE-MLT-CAT/blob/main/CITATION.cff", "description": "Groundtruth for 19th/20th sale/exhibition catalogues, mainly printed in France but not only.", "format": "Alto-XML", "hands": { "count": "unknown", "precision": "exact" }, "institutions": [], "language": [ "por", "fra", "ita" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://github.com/FoNDUE-HTR", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1972", "notBefore": "1818" }, "title": "FONDUE-MLT-CAT", "transcription-guidelines": "Segmentation include an extra zone `CustomeZone: entry`", "url": "https://github.com/FoNDUE-HTR/FONDUE-MLT-CAT", "volume": [ { "count": 1285120, "metric": "characters" }, { "count": 1381, "metric": "files" }, { "count": 43114, "metric": "lines" }, { "count": 10713, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Pradier, Frederine and Gabay, Simon and Jan\u00e8s, Juliette and S\u00e1nchez Oeconomo, Esteban and Kervegan, Paul},\nmonth = {10},\ntitle = {FoNDUE - Datasets for historical catalogues},\nurl = {https://github.com/FoNDUE-HTR/FONDUE-MLT-CAT},\nyear = {2022}\n}\n", "_apa": "Pradier F., Gabay S., Jan\u00e8s J., S\u00e1nchez Oeconomo E., Kervegan P. (2022). FoNDUE - Datasets for historical catalogues (version 0.9). URL: https://github.com/FoNDUE-HTR/FONDUE-MLT-CAT\n", "_pid": "74eadcb57" }, "dcb8b4eaf": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "FoNDUE_Kunsthistorisches-UZH_Archivdatenbank", "url": "https://github.com/FoNDUE-HTR/FoNDUE_Kunsthistorisches-UZH_Archivdatenbank", "authors": [ { "name": "Pauline", "surname": "Jacsont", "orcid": "0000-0002-6296-3246", "roles": [ "project-manager", "transcriber", "aligner", "quality-control" ] }, { "name": "Simon", "surname": "Gabay", "orcid": "0000-0001-9094-4475", "roles": [ "project-manager", "quality-control", "support" ] }, { "name": "Tristan", "surname": "Weddigen", "orcid": "0000-0002-4609-8950", "roles": [ "support" ] } ], "institutions": [], "description": "HTR data made with the Kunsthistorisches UZH corpus.", "project-name": "FoNDUE", "project-website": "https://www.unige.ch/lettres/humanites-numeriques/recherche/projets-de-la-chaire/fondue", "language": [ "deu", "fra", "ita" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "evenly-mixed", "time": { "notBefore": "1900", "notAfter": "1999" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "pages", "count": 1100 } ], "citation-file-link": "https://github.com/FoNDUE-HTR/FoNDUE_Kunsthistorisches-UZH_Archivdatenbank/blob/main/CITATION.cff", "transcription-guidelines": "The transcription is strictly diplomatic: no abbreviations are resolved. \u2028Items that are crossed out or struck through will be transcribed with a \"\u20ac\".", "automatically-aligned": false, "_pid": "dcb8b4eaf" }, "70e2b99f0": { "authors": [ { "name": "Gabay", "roles": [ "project-manager" ], "surname": "Simon" }, { "name": "Pinche", "roles": [ "project-manager" ], "surname": "Ariane" }, { "name": "Leroy", "roles": [ "transcriber" ], "surname": "No\u00e9" }, { "name": "Christensen", "roles": [ "support" ], "surname": "Kelly" } ], "characters": { "members": [ "e", "i", "s", "t", "u", "n", "a", "r", "o", "l", "d", "c", "m", "p", "q", "f", "g", ".", "\u0303", "h", "b", "z", "y", "I", "x", "\u204a", ",", "R", "E", "C", "\u033e", "Q", "L", "S", "A", "D", "M", "\u0363", "\ua751", "\u0365", "P", "\ua76f", "T", "N", "\u00b6", "O", "B", "\u0364", "U", "-", "1", "\ua770", "\u1dd1", "\u033d", "2", "3", "\u1e9c", "F", "\u27e6", "\u27e7", "6", "\u0127", "\ua753", "7", "4", "\u0368", "9", "8", ";", "G", "0", "\u0366", "5", "H", "'", "\u0300", "\u0142", "\u0111", "\u0301", "\u036b", "\u2038", "&", "k", "\u00b0", "\u1e9e", "\u036c", "\u1de4", "K", "[", "]", "\u036f", "\u0327", "(", ")", "Y", "Z", ":", "\u0367", "\u1de0", "X" ], "mode": "NFD" }, "citation-file-link": "https://github.com/Gallicorpora/HTR-MSS-15e-Siecle/CITATION.", "description": "Corpus d'entrainement pour l'HTR compos\u00e9 de manuscrits fran\u00e7ais du 15e s.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "language": [ "frm", "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "Gallicorpora", "project-website": "https://github.com/Gallicorpora", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1500", "notBefore": "1400" }, "title": "Donn\u00e9es HTR manuscrits du 15e si\u00e8cle", "transcription-guidelines": "Les normes de transcription suivent les pr\u00e9conisations du projet CREMMALAB : https://cremmalab.hypotheses.org", "url": "https://github.com/Gallicorpora/HTR-MSS-15e-Siecle", "volume": [ { "count": 169207, "metric": "characters" }, { "count": 85, "metric": "files" }, { "count": 5937, "metric": "lines" }, { "count": 458, "metric": "regions" } ], "automatically-aligned": false, "_pid": "70e2b99f0" }, "4b2e8b703": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Donn\u00e9es imprim\u00e9s du 16e si\u00e8cle", "description": "Corpus d'entrainement pour l'HTR constitu\u00e9 d'imprim\u00e9s du 16e si\u00e8cle", "url": "https://github.com/Gallicorpora/HTR-imprime-16e-siecle", "authors": [ { "name": "Gabay", "surname": "Simon", "roles": [ "project-manager" ] }, { "name": "Pinche", "roles": [ "project-manager" ], "surname": "Ariane" }, { "name": "Vlachou-Efstathiou", "surname": "malamatenia", "roles": [ "transcriber" ] }, { "name": "Christensen", "surname": "Kelly", "roles": [ "support" ] } ], "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "language": [ "frm", "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "project-name": "Gallicorpora", "project-website": "https://github.com/Gallicorpora", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1599", "notBefore": "1500" }, "transcription-guidelines": "Les normes de transcription suivent les pr\u00e9conisations du projet Gallicorpora", "volume": [ { "metric": "characters", "count": 186202 }, { "metric": "files", "count": 180 }, { "metric": "lines", "count": 4918 }, { "metric": "regions", "count": 591 } ], "citation-file-link": "https://github.com/Gallicorpora/HTR-imprime-16e-siecle/CITATION.cff", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "u", "r", "a", "n", "i", "t", "o", "l", "s", "\u017f", "d", "c", "m", "p", ",", "q", "y", "v", "f", "g", "b", "h", ".", "\u2019", "&", "E", "x", "'", "z", "\u0301", "\u0300", "A", "\u00ac", "\u0303", "D", "C", "R", ":", "L", "I", "S", "P", "N", "M", "O", "Q", "T", "V", "G", "H", "B", "F", "-", "\u0327", "j", "?", "(", "\u0308", ")", "\u00bb", "1", "\u0153", "\u00b6", "!", "U", "2", "X", ";", "9", "Y", "4", "3", "\u00df", "5", "\"", "7", "J", "8", "\u00e6", "\ua770", "6", "0", "\u0302", "\u02b3", "\u204a", "Z", "\u00ab", "*", "\ua757", "\ua753", "\u00a0", "\u204b", "\u0399", "\ua751", "]", "\u0365", "\u1d49", "\u0395", "[", "\u03a4", "/" ] }, "automatically-aligned": false, "_pid": "4b2e8b703" }, "3b032d4e1": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Imprim\u00e9s 17e si\u00e8cle", "description": "Corpus d'entrainement pour l'HTR compos\u00e9 d'imprim\u00e9s fran\u00e7ais du 17e s.", "url": "https://github.com/Gallicorpora/HTR-imprime-17e-siecle", "authors": [ { "name": "Gabay", "surname": "Simon", "roles": [ "project-manager" ] }, { "name": "Pinche", "surname": "Ariane", "roles": [ "project-manager" ] }, { "name": "Fabert", "surname": "Eliott", "roles": [ "transcriber" ] }, { "name": "Vlachou-Efstathiou", "surname": "malamatenia", "roles": [ "transcriber" ] }, { "name": "Christensen", "surname": "Kelly", "roles": [ "support" ] } ], "project-name": "Gallicorpora", "project-website": "https://github.com/Gallicorpora", "language": [ "frm", "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1600", "notAfter": "1699" }, "hands": { "count": "1-per-folder", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 255981 }, { "metric": "files", "count": 327 }, { "metric": "lines", "count": 8950 }, { "metric": "regions", "count": 1185 } ], "transcription-guidelines": "Les normes de transcription suivent les pr\u00e9conisations du projet gallicorpora", "citation-file-link": "https://github.com/Gallicorpora/HTR-imprime-17e-siecle/CITATION.cff", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "u", "r", "a", "n", "i", "t", "o", "l", "s", "\u017f", "d", "c", "m", "p", ",", "v", "q", ".", "f", "g", "b", "E", "\u2019", "h", "y", "\u0301", "A", "&", "'", "S", "I", "x", "\u00ac", "L", "C", "R", "P", "D", "\u0300", "M", "V", "T", "O", "N", "z", ":", "Q", "j", "-", "F", "G", "\u0303", "B", ";", "H", "\u0308", "1", "\u0302", "\u0327", "2", "?", "3", "\u0153", "4", "5", "Y", "U", "Z", "6", "7", "8", "0", "X", "J", "9", "(", "\u00e6", ")", "\u00c6", "\u03b9", "\u03b1", "!", "\u00df", "\u03bf", "\u03bd", "\u03b5", "\u03c1", "\u0313", "\u03c5", "\u03ba", "*", "\u03c3", "\u03c4", "\u03c9", "[", "]", "\ua770", "K", "\u0391", "\u03c7", "\u03c2", "\u03c0", "\u03b3", "\u0328", "\u03bc", "k", "\u0342", "\u039d", "\u0392", "\u03bb", "\u03a3", "\u039a", "\u03b7", "\u03b8", "W", "\u0152", "\u03b4", "\u03a4", "\u0345", "\u00bb", "\u1d49", "\u02e1", "\u0367", "\u0396", "\u03b2", "\u0314", "\u0307", "\u00b0", "w", "\u1e9e", "\u03a6", "\u039b", "\u03a7", "\u03c6", "\u0399", "\u02b3", "\u1d50" ] }, "automatically-aligned": false, "_pid": "3b032d4e1" }, "5d92a9eb8": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Donn\u00e9es imprim\u00e9s du 18e si\u00e8cle", "description": "Corpus d'entrainement pour l'HTR constitu\u00e9 d'imprim\u00e9s du 18e si\u00e8cle", "url": "https://github.com/Gallicorpora/HTR-imprime-18e-siecle", "authors": [ { "name": "Gabay", "roles": [ "project-manager" ], "surname": "Simon" }, { "name": "Pinche", "roles": [ "project-manager" ], "surname": "Ariane" }, { "name": "Fabert", "roles": [ "transcriber" ], "surname": "Eliott" }, { "name": "Christensen", "roles": [ "support" ], "surname": "Kelly" } ], "project-name": "Gallicorpora", "project-website": "https://github.com/Gallicorpora", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1700", "notAfter": "1799" }, "hands": { "count": "1-per-folder", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 255981 }, { "metric": "files", "count": 327 }, { "metric": "lines", "count": 8950 }, { "metric": "regions", "count": 1185 } ], "transcription-guidelines": "Les normes de transcription suivent les pr\u00e9conisations du projet gallicorpora", "citation-file-link": "https://github.com/Gallicorpora/HTR-imprime-18e-siecle/CITATION.cff", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "u", "r", "a", "n", "i", "t", "o", "l", "s", "\u017f", "d", "c", "m", "p", ",", "v", "q", ".", "f", "g", "b", "E", "\u2019", "h", "y", "\u0301", "A", "&", "'", "S", "I", "x", "\u00ac", "L", "C", "R", "P", "D", "\u0300", "M", "V", "T", "O", "N", "z", ":", "Q", "j", "-", "F", "G", "\u0303", "B", ";", "H", "\u0308", "1", "\u0302", "\u0327", "2", "?", "3", "\u0153", "4", "5", "Y", "U", "Z", "6", "7", "8", "0", "X", "J", "9", "(", "\u00e6", ")", "\u00c6", "\u03b9", "\u03b1", "!", "\u00df", "\u03bf", "\u03bd", "\u03b5", "\u03c1", "\u0313", "\u03c5", "\u03ba", "*", "\u03c3", "\u03c4", "\u03c9", "[", "]", "\ua770", "K", "\u0391", "\u03c7", "\u03c2", "\u03c0", "\u03b3", "\u0328", "\u03bc", "k", "\u0342", "\u039d", "\u0392", "\u03bb", "\u03a3", "\u039a", "\u03b7", "\u03b8", "W", "\u0152", "\u03b4", "\u03a4", "\u0345", "\u00bb", "\u1d49", "\u02e1", "\u0367", "\u0396", "\u03b2", "\u0314", "\u0307", "\u00b0", "w", "\u1e9e", "\u03a6", "\u039b", "\u03a7", "\u03c6", "\u0399", "\u02b3", "\u1d50" ] }, "automatically-aligned": false, "_pid": "5d92a9eb8" }, "7dde3f71f": { "authors": [ { "name": "Gabay", "roles": [ "project-manager" ], "surname": "Simon" }, { "name": "Pinche", "roles": [ "project-manager" ], "surname": "Ariane" }, { "name": "Leroy", "roles": [ "transcriber" ], "surname": "No\u00e9" }, { "name": "Christensen", "roles": [ "support" ], "surname": "Kelly" } ], "characters": { "members": [ "e", "s", "u", "t", "a", "i", "r", "o", "n", "l", "d", "c", "m", "p", "\u0303", "f", "q", "g", "y", "h", "b", ".", "z", "\u204a", "x", "E", "-", ",", "\u00b6", "L", "\u0365", "D", "C", ";", "\u1de4", "I", "\ua770", "Q", "A", "S", "\ua751", "P", "M", "O", "T", "U", "N", "F", "R", "\ua753", "B", "G", "\ua76f", "\u033e", "H", "\u1dd1", "\u036c", "\u030c", ":", "(", "[", "]", "v", "J", "\ua758", ")", "k", "\ua759", "\u0363", "V", "4", "\u0366", "w", "\u0368", "\u0364", "\u0399", "\u0327", "1", "9", "7", "\u0336", "'", "\u0301", "|" ], "mode": "NFD" }, "citation-file-link": "https://github.com/Gallicorpora/HTR-incunable-15e-siecle/CITATION.cff", "description": "Corpus d'entrainement pour l'HTR compos\u00e9 d'incunable fran\u00e7ais du 15e s.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "language": [ "frm", "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "Gallicorpora", "project-website": "https://github.com/Gallicorpora", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1500", "notBefore": "1400" }, "title": "Donn\u00e9es HTR incunables du 15e si\u00e8cle", "transcription-guidelines": "Les normes de transcription suivent les pr\u00e9conisations du projet CREMMALAB : https://cremmalab.hypotheses.org", "url": "https://github.com/Gallicorpora/HTR-incunable-15e-siecle", "volume": [ { "count": 245094, "metric": "characters" }, { "count": 149, "metric": "files" }, { "count": 7608, "metric": "lines" }, { "count": 535, "metric": "regions" } ], "automatically-aligned": false, "_pid": "7dde3f71f" }, "70e75eb5c": { "authors": [ { "name": "Emmanuelle", "roles": [ "project-manager" ], "surname": "de Champs" }, { "name": "Florence", "roles": [ "project-manager", "quality-control", "transcriber" ], "surname": "Clavaud" }, { "name": "Pauline", "roles": [ "project-manager", "quality-control", "transcriber" ], "surname": "Charbonnier" }, { "name": "Christine", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Nougaret" }, { "name": "Alix", "roles": [ "aligner", "project-manager", "quality-control", "support" ], "surname": "Chagu\u00e9" }, { "name": "Thibault", "roles": [ "aligner", "project-manager", "quality-control", "support" ], "surname": "Cl\u00e9rice" }, { "name": "Falcoz", "roles": [ "aligner" ], "surname": "Elsa" }, { "name": "Marie-Fran\u00e7oise", "roles": [ "project-manager" ], "surname": "Limon-Bonnet" }, { "name": "Elise", "roles": [ "project-manager" ], "surname": "Wojszvzyk" }, { "name": "Sylvie", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Dechavanne" }, { "roles": [ "transcriber" ], "surname": "ALemoine" }, { "roles": [ "transcriber" ], "surname": "ASJPeronneau" }, { "roles": [ "transcriber" ], "surname": "Alcofrybas" }, { "roles": [ "transcriber" ], "surname": "BeaLct" }, { "roles": [ "transcriber" ], "surname": "CLbt" }, { "roles": [ "transcriber" ], "surname": "Chloelsa" }, { "roles": [ "transcriber" ], "surname": "DMichel" }, { "roles": [ "transcriber" ], "surname": "Desauthieux" }, { "roles": [ "transcriber" ], "surname": "EPerrin" }, { "roles": [ "transcriber" ], "surname": "GBMireille" }, { "roles": [ "transcriber" ], "surname": "GPINET" }, { "roles": [ "transcriber" ], "surname": "Genea78" }, { "roles": [ "transcriber" ], "surname": "JMGoux" }, { "roles": [ "transcriber" ], "surname": "Jideuxhemme" }, { "roles": [ "transcriber" ], "surname": "LBIsabelle" }, { "roles": [ "transcriber" ], "surname": "Lamotte" }, { "roles": [ "transcriber" ], "surname": "MFGarreau" }, { "roles": [ "transcriber" ], "surname": "MIna" }, { "roles": [ "transcriber" ], "surname": "Maniet" }, { "roles": [ "transcriber" ], "surname": "MarionJo" }, { "roles": [ "transcriber" ], "surname": "PGambette" }, { "roles": [ "transcriber" ], "surname": "PPocard" }, { "roles": [ "transcriber" ], "surname": "PROMBAUT" }, { "roles": [ "transcriber" ], "surname": "PaulineTest" }, { "roles": [ "transcriber" ], "surname": "SCayeux" }, { "roles": [ "transcriber" ], "surname": "SL." }, { "roles": [ "transcriber" ], "surname": "SLespinasse" }, { "roles": [ "transcriber" ], "surname": "Silver08" }, { "roles": [ "transcriber" ], "surname": "TPell\u00e9" }, { "roles": [ "transcriber" ], "surname": "Val\u00e9rie" }, { "roles": [ "transcriber" ], "surname": "alp" }, { "roles": [ "transcriber" ], "surname": "jmorvan" }, { "roles": [ "transcriber" ], "surname": "lelia" }, { "roles": [ "transcriber" ], "surname": "majubama" }, { "roles": [ "transcriber" ], "surname": "mickael.lefevr" }, { "roles": [ "transcriber" ], "surname": "sgauthier" }, { "roles": [ "quality-control" ], "surname": "EdChamps" }, { "name": "Dani\u00e8le", "roles": [ "support" ], "surname": "Allezard" }, { "name": "Fran\u00e7oise", "roles": [ "support" ], "surname": "Auriau" }, { "name": "Sophie", "roles": [ "support" ], "surname": "Blanchard" }, { "name": "Laure", "roles": [ "support" ], "surname": "Cadars" }, { "name": "Paul", "roles": [ "support" ], "surname": "Cazin-Bernier" }, { "name": "Rosine", "roles": [ "support" ], "surname": "Cleyet-Michaud" }, { "name": "Sophie", "roles": [ "support" ], "surname": "Delinge" }, { "name": "Christiane", "roles": [ "support" ], "surname": "Demeulenaere-Douy\u00e8re" }, { "name": "Mathilde", "roles": [ "support" ], "surname": "Deuve" }, { "name": "Tristan", "roles": [ "support" ], "surname": "Girard" }, { "name": "Wilfried", "roles": [ "support" ], "surname": "Gourdon" }, { "name": "Emilie", "roles": [ "support" ], "surname": "Laffitte-Louisou" }, { "name": "Val\u00e9rie", "roles": [ "support" ], "surname": "Lem\u00e9e" }, { "name": "Jean-Claude", "roles": [ "support" ], "surname": "Lescure" }, { "name": "M\u00e9lisa", "roles": [ "support" ], "surname": "Locatelli" }, { "name": "Aur\u00e9lie", "roles": [ "support" ], "surname": "Massie" }, { "name": "Thomas", "roles": [ "support" ], "surname": "Olivier" }, { "name": "Fran\u00e7oise", "roles": [ "support" ], "surname": "Pinchard" }, { "name": "Tiffanie", "roles": [ "support" ], "surname": "Pitot" }, { "name": "Anais", "roles": [ "support" ], "surname": "Pontoparia" }, { "name": "Michel", "roles": [ "support" ], "surname": "Renard" }, { "name": "Thierry", "roles": [ "support" ], "surname": "Rihouey" }, { "name": "Christian", "roles": [ "support" ], "surname": "Rodriguez" }, { "name": "Konstantinos", "roles": [ "support" ], "surname": "Sifakis" }, { "name": "Marie-Th\u00e9r\u00e8se", "roles": [ "support" ], "surname": "Solignat" }, { "name": "Lucie", "roles": [ "support" ], "surname": "Vieillon" }, { "roles": [ "support" ], "surname": "SL" } ], "characters": { "members": [ "e", "a", "i", "n", "t", "s", "r", "u", "o", "l", "m", "d", "c", "p", "\u0301", "\u0300", "f", "v", "g", ",", "q", "b", ".", "\u2019", "1", "h", "M", "J", "j", "P", "C", "A", "-", "x", "L", "S", "F", "9", "y", "D", "B", "\u0302", "R", "2", "^", "4", "z", "0", "E", "V", "G", "3", "5", "T", ")", "(", "H", "6", "N", "7", "8", "I", ":", "O", ";", "Q", "\u0327", "\u00b0", "U", "\u00a0", "/", "W", "\"", "\u0308", ">", "<", "=", "\u0153", "w", "?", "_", "X", "%", "k", "*", "\u017f", "!", "Z", "&", "'", "\u2013", "K", "+" ], "mode": "NFD" }, "citation-file-link": "https://github.com/Dummy/depot-test/CITATION.cff", "description": "WWI\u2019s Poilus' testaments edited by the Archives National during the Testaments de Poilus project.", "format": "Alto-XML", "hands": { "count": "1-per-file", "precision": "estimated" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "Testaments de Poilus", "project-website": "https://edition-testaments-de-poilus.huma-num.fr/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1918", "notBefore": "1914" }, "title": " CREMMA-AN Testament De Poilus ", "transcription-guidelines": "The original transcriptions were performed on a crowdsourcing application (https://testaments-de-poilus.huma-num.fr/#!/) under the supervision of the Archives nationales de France. Only the allographic portions of the documents were transcribed. Any marginal elements added later by clerks or archivists are neither segmented nor transcribed. The segmentation follows the SegmOnto ontology. Abbreviations and mispelling were not corrected. Superscripted portions of text are preceeded by ^. ", "url": "https://github.com/HTR-United/CREMMA-AN-TestamentDePoilus", "volume": [ { "count": 87726, "metric": "characters" }, { "count": 226, "metric": "files" }, { "count": 3330, "metric": "lines" }, { "count": 553, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chagu\u00e9, Alix and Cl\u00e9rice, Thibault and Mazoue, Ana\u00efs and Van Kote, Elsa},\ntitle = {CREMMA-AN-TestamentDePoilus },\nurl = {https://github.com/HTR-United/CREMMA-AN-TestamentDePoilus}\n}\n", "_apa": "Chagu\u00e9 A., Cl\u00e9rice T., Mazoue A., Van Kote E. CREMMA-AN-TestamentDePoilus URL: https://github.com/HTR-United/CREMMA-AN-TestamentDePoilus\n", "_pid": "70e75eb5c" }, "92add71a0": { "authors": [ { "name": "Thibault", "orcid": "0000-0003-1852-9204", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Cl\u00e9rice" }, { "name": "Alix", "orcid": "0000-0002-0136-4434", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Chagu\u00e9" }, { "name": "Ana\u00efs", "roles": [ "transcriber" ], "surname": "Mazoue" } ], "automatically-aligned": false, "characters": { "members": [ "e", "r", "n", "a", "u", "o", "t", "i", "l", "\u017f", "d", "s", "c", "m", "p", "v", "y", "q", "g", "f", "b", "z", "h", "J", "/", "x", "R", "^", "L", "I", ".", "E", "\u1e9c", "\u204a", "M", "1", "\ua751", "A", "\u0301", "\u033e", "<", ">", "j", "C", "D", "3", "\ua759", "9", "V", "7", "6", "\u2019", "P", "8", "\ua750", "\u0303", "T", "(", "S", "N", ";", "Q", "\u0300", "5", "0", "U" ], "mode": "NFD" }, "citation-file-link": "https://github.com/HTR-United/CREMMA-MSS-16/CITATION.cff", "description": "Manuscripts of the 16th century", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "institutions": [], "language": [ "fra" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "CREMMA", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1599", "notBefore": "1500" }, "title": "CREMMA MSS 16", "transcription-guidelines": "Abr\u00e9viations conserv\u00e9es.", "url": "https://github.com/HTR-United/CREMMA-MSS-16", "volume": [ { "count": 10911, "metric": "characters" }, { "count": 9, "metric": "files" }, { "count": 244, "metric": "lines" }, { "count": 18, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Mazoue, Ana\u00efs and Cl\u00e9rice, Thibault and Chagu\u00e9, Alix},\nmonth = {3},\ntitle = {CREMMA-MSS-16},\nurl = {https://github.com/HTR-United/CREMMA-MSS-16},\nyear = {2024}\n}\n", "_apa": "Mazoue A., Cl\u00e9rice T., Chagu\u00e9 A. (2024). CREMMA-MSS-16 URL: https://github.com/HTR-United/CREMMA-MSS-16\n", "_pid": "92add71a0" }, "f31231d2b": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "CREMMA Manuscrits du 17e", "url": "https://github.com/HTR-United/CREMMA-MSS-17", "project-name": "CREMMA", "authors": [ { "name": "Cl\u00e9rice", "surname": "Thibault", "roles": [ "project-manager", "quality-control" ] }, { "name": "Chagu\u00e9", "surname": "Alix", "roles": [ "project-manager", "quality-control" ] }, { "name": "Faure", "surname": "Margaux", "roles": [ "transcriber" ] }, { "name": "Norindr", "surname": "Jade", "roles": [ "transcriber" ] }, { "name": "Mazoue", "surname": "Anais", "roles": [ "transcriber" ] }, { "name": "Davoury", "surname": "Baudoin", "roles": [ "transcriber" ] } ], "description": "Various Manuscripts of the 17th century", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1600", "notAfter": "1699" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 81909 }, { "metric": "files", "count": 111 }, { "metric": "lines", "count": 2245 }, { "metric": "regions", "count": 264 } ], "transcription-guidelines": "Abr\u00e9viations conserv\u00e9es.", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "s", "r", "a", "n", "u", "i", "o", "t", "l", "d", "c", "m", "p", "v", "q", ".", ",", "y", "'", "f", "b", "g", "\u0301", "h", "j", "\u0303", "M", "x", "R", "z", "C", "1", "J", "^", "\u0300", "P", "L", "S", "V", "&", "A", "E", ">", "I", "<", "2", "X", "3", "T", "7", "D", "6", "]", "B", "4", "[", "0", "?", "-", "\u0302", "\u0308", "9", "5", ";", "G", "N", "8", ":", "F", "\u0327", ")", "(", "Q", "O", "H", "W", "\u0153", "\u2038", "\u204a", "U", "\u0304", "/", "\ua757", "+", "k", "\u00b0", "\u00a0", "w", "\u05dd", "Z", "\u03c2", "#", "\u00e6", "\ua759", "\u0363", "\u03b5", "\u03d5" ] }, "automatically-aligned": false, "_pid": "f31231d2b" }, "080333ca7": { "authors": [ { "name": "Chagu\u00e9", "roles": [ "project-manager", "quality-control" ], "surname": "Alix" }, { "name": "Cl\u00e9rice", "roles": [ "project-manager", "quality-control" ], "surname": "Thibault" }, { "name": "Norindr", "roles": [ "transcriber" ], "surname": "Jade" }, { "name": "Norindr", "roles": [ "transcriber" ], "surname": "Jade" }, { "name": "Van Kote", "roles": [ "transcriber", "aligner" ], "surname": "Elsa" }, { "name": "Faure", "roles": [ "transcriber", "aligner" ], "surname": "Margaux" } ], "characters": { "members": [ "e", "s", "a", "r", "t", "n", "u", "i", "o", "l", "d", "p", "c", "m", "v", ".", "q", "f", "\u0301", "'", ",", "g", "b", "h", "y", "x", "j", "L", "C", "\u0300", "^", "1", "M", "S", "\u0302", "z", "E", "R", ";", "2", "I", "6", "0", ">", "<", "D", "V", "J", "4", "3", "(", ")", "P", "\u0308", "5", "\u0303", "-", "7", "B", "8", "A", "[", "]", "9", "N", "F", "G", "T", "?", "X", "\u0327", "/", ":", "O", "H", "\u2019", "\u00ac", "+", "\u00a0", "\u0153", "U", "&", "\u00ab", "Q", "=", "K", "!", "k", "W", "Z", "w", "\u00b0", "\u204a", "\ua751", "\u017f", "\u2038", "#", "\u0336", "_", "Y", "\u0304", "\u00bb", "\u0366" ], "mode": "NFD" }, "description": "Manuscripts of the 18th century", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "CREMMA", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1799", "notBefore": "1700" }, "title": "CREMMA Manuscrits du 18e", "transcription-guidelines": "Abr\u00e9viations conserv\u00e9es.", "url": "https://github.com/HTR-United/CREMMA-MSS-18", "volume": [ { "count": 141690, "metric": "characters" }, { "count": 125, "metric": "files" }, { "count": 4019, "metric": "lines" }, { "count": 329, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Van Kote, Elsa and Faure, Margaux and Norindr, Jade and Cl\u00e9rice, Thibault and Chagu\u00e9, Alix},\nmonth = {3},\ntitle = {CREMMA-MSS-18},\nurl = {https://github.com/HTR-United/CREMMA-MSS-18},\nyear = {2024}\n}\n", "_apa": "Van Kote E., Faure M., Norindr J., Cl\u00e9rice T., Chagu\u00e9 A. (2024). CREMMA-MSS-18 URL: https://github.com/HTR-United/CREMMA-MSS-18\n", "_pid": "080333ca7" }, "6bd8117b0": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "CREMMA Manuscrits du 19e", "url": "https://github.com/HTR-United/CREMMA-MSS-19", "project-name": "CREMMA", "authors": [ { "name": "Cl\u00e9rice", "surname": "Thibault", "roles": [ "project-manager", "quality-control" ] }, { "name": "Chagu\u00e9", "surname": "Alix", "roles": [ "project-manager", "quality-control" ] }, { "name": "Davoury", "surname": "Baudouin", "roles": [ "transcriber", "aligner" ] }, { "name": "Doat", "surname": "Soline", "roles": [ "transcriber", "aligner" ] }, { "name": "Faure", "surname": "Margaux", "roles": [ "transcriber", "aligner" ] }, { "name": "Humeau", "surname": "Maxime", "roles": [ "transcriber", "aligner" ] } ], "description": "Manuscripts of the 19th century", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1800", "notAfter": "1899" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 55581 }, { "metric": "files", "count": 69 }, { "metric": "lines", "count": 1807 }, { "metric": "regions", "count": 167 } ], "transcription-guidelines": "Abr\u00e9viations conserv\u00e9es.", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "s", "a", "i", "u", "n", "r", "t", "o", "l", "d", "m", "c", "p", "v", ",", "\u0301", "'", "q", "f", ".", "g", "b", "h", "\u0300", "j", "x", "-", "\u0302", "L", "C", "M", "y", "J", "z", "A", "D", "P", "\"", ">", "<", "E", "!", "N", "S", "Q", "1", ";", "?", ":", "R", "I", "T", "B", "V", "\u0153", "6", "O", "(", "_", ")", "2", "3", "H", "4", "^", "9", "8", "7", "F", "0", "G", "5", "\u0327", "U", "&", "[", "]", "\u00b0", "\u0308", "k", "$", "w", "X", "W", "Y", "+", "Z" ] }, "automatically-aligned": false, "_pid": "6bd8117b0" }, "b6e607ab9": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "CREMMA Manuscrits du 20e", "url": "https://github.com/HTR-United/CREMMA-MSS-20", "project-name": "CREMMA", "authors": [ { "name": "Cl\u00e9rice", "surname": "Thibault", "roles": [ "project-manager", "quality-control" ] }, { "name": "Chagu\u00e9", "surname": "Alix", "roles": [ "project-manager", "quality-control" ] } ], "description": "Manuscripts of the 20th century\n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1900", "notAfter": "1999" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 5764 }, { "metric": "files", "count": 13 }, { "metric": "lines", "count": 224 }, { "metric": "regions", "count": 25 } ], "transcription-guidelines": "Abr\u00e9viations conserv\u00e9es.", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFKD", "members": [ "e", "a", "s", "n", "t", "r", "i", "u", "l", "o", "d", "c", "m", "p", "\u0301", "<", ">", "'", "v", "q", ",", ".", "\u0300", "b", "g", "h", "j", "f", "F", "J", "1", "-", "\u0302", "M", "A", "E", "x", "T", "y", "C", "D", "^", "O", "8", "N", "7", "B", "S", "0", "\u0327", "P", "G", "R", "H", "L", "9", "z", "I", "2", ":", "U", "&", "k", "+", ";", "$", "V", "\u0153", "[", "?", "]", "4", "3", "(", ")", "6" ] }, "automatically-aligned": false, "_pid": "b6e607ab9" }, "2bea975a1": { "authors": [ { "name": "Cl\\xE9rice", "orcid": "0000-0003-1852-9204", "roles": [ "transcriber", "aligner", "project-manager", "quality-control" ], "surname": "Thibault" }, { "name": "Chagu\\xE9", "orcid": "0000-0002-0136-4434", "roles": [ "project-manager" ], "surname": "Alix" }, { "name": "Vlachou Efstathiou", "orcid": "0000-0002-9397-356X", "roles": [ "transcriber", "aligner" ], "surname": "Malamatenia" } ], "characters": { "members": [ "i", "e", "t", "a", "u", "s", "\u0303", "o", "n", "r", "c", "d", "m", "l", "p", ".", "\u033e", "q", "b", "g", "f", "\u204a", "\uf1ac", "\u0363", "h", "\ua770", "\ua751", "\u0365", "x", "\u0142", "\u1dd1", "\u1de4", "\u0366", "\ua759", "\ua76f", "I", ":", "\u0364", "\u036d", "\ua775", "\ua753", "S", "\u036b", "\u00b6", "\u1e9c", "E", "U", "A", "\u0368", "C", "\u0127", "N", "Q", "y", "\ua757", "\u1d48", "D", "\u0335", "R", "P", "\u036c", "\u1ddd", "M", "T", "\ua76d", "/", "^", "2", "\u0367", "&", "z", ",", "H", "O", "\u00ac", "L", "1", "3", "4", "F", "=", "G", "\u1de0", "\u00f7", "\u2125", "5", "B", "9", "\u00d8", "\u0307", "\ua758", "6", "\u0327", "X", "8", "0", "\u1d47", "k", "7", "'", "*", "\uf2da", "w", "-", "Y", "\u0301", "\u0308", "+", "Z", "\u0111", "\u00a0", "K", "\u204b", "\u1d56", "\uf038", "\u03b9" ], "mode": "NFD" }, "description": "Ground truth for medieval latin manuscripts. Formerly `CREMMA-Medieval-LAT`.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "institutions": [], "language": [ "lat" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "CREMMA", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1599", "notBefore": "1100" }, "title": "CREMMA Medii Aevi", "transcription-guidelines": "Not a graphetic/\"allographetique\" transcription but rather a graphemic one that preserves the sequence of letters and reduces each form to its meaning in an alphabetical system. Abbreviations are preserved (e.g. pro, pre, tironian et, \"est\" etc.), as well as abbreviative signs, ligatures are reduced to their component letters. Spaces between letters reproduce the original (e.g. in the case of a semicontinuous script). Punctuations are simplified, reducing to \":\" all two-component punctuation (e.g. punctus elevatus). Rare characters have been preserved such as \"instans\" and metric values (e.g. ounces). ", "url": "https://github.com/HTR-United/CREMMA-Medieval-LAT", "volume": [ { "count": 263222, "metric": "characters" }, { "count": 121, "metric": "files" }, { "count": 7274, "metric": "lines" }, { "count": 441, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Cl\u00e9rice, Thibault and Chagu\u00e9, Alix and Vlachou-Efstathiou, Malamatenia},\ndoi = {10.5281/zenodo.7013436},\ntitle = {CREMMA Medii Aevi},\nurl = {https://github.com/HTR-United/CREMMA-Medieval-LAT}\n}\n", "_apa": "Cl\u00e9rice T., Chagu\u00e9 A., Vlachou-Efstathiou M. CREMMA Medii Aevi DOI: 10.5281/zenodo.7013436 URL: https://github.com/HTR-United/CREMMA-Medieval-LAT\n", "_pid": "2bea975a1" }, "baa415760": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "CREMMA Early Modern Books", "url": "https://github.com/HTR-United/cremma-16-17-print", "project-name": "CREMMA", "authors": [ { "name": "Cl\u00e9rice", "surname": "Thibault", "roles": [ "transcriber", "project-manager" ] } ], "description": "Collection of book samples in early print forms, 16th to 17th century, in Latin and pre-orthographic French.", "language": [ "frm", "lat" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1500", "notAfter": "1779" }, "hands": { "count": "1-per-folder", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 84726 }, { "metric": "files", "count": 98 }, { "metric": "lines", "count": 2603 }, { "metric": "regions", "count": 451 } ], "sources": [ { "reference": "Omnia Andreae Alciati v.c. emblemata cum commentariis", "link": "http://pid.emory.edu/ark:/25593/b70rv" }, { "reference": "15.. \tHistoria de duobus amantibus Eurialo et Lucretia ", "link": "https://gallica.bnf.fr/ark:/12148/bpt6k533863" }, { "reference": "1520 \tEpigrammata clarissimi disertissimique viri Thomae Mori...", "link": "https://doi.org/10.3931/e-rara-74397" }, { "reference": "'1550 \tLa description de l'isle d'Utopie, o\u00fa est comprins '\n", "link": "https://gallica.bnf.fr/ark:/12148/bpt6k6566444g" }, { "reference": "1779 \tZoologia Danica, seu, Animalium Daniae et Norvegiae ", "link": "https://archive.org/details/zoologiadanicase01mlle" }, { "reference": "'L'Achileyde de Stace... traduction en vers, avec ...'", "link": "https://gallica.bnf.fr/view3if/ga/ark:/12148/bpt6k3103841" }, { "reference": "1681 \tVigili\u00e6 Rhetorum, Et Somnia Poetarvm, Symbolic\u00e8", "link": "http://diglib.hab.de/drucke/qun-607-5/start.htm" }, { "reference": "Aneau, Barth\u00e9lemy: Picta Poesis - Lugduni : Pesnot, 1564", "link": "http://diglib.hab.de/drucke/231-5-poet/start.htm" } ], "citation-file-link": "https://raw.githubusercontent.com/HTR-United/cremma-16-17-print/main/CITATION.CFF", "transcription-guidelines": "Kept abbreviation and transcribed long s as long s", "production-software": "eScriptorium + Kraken", "characters": { "mode": "NFD", "members": [ "e", "i", "u", "a", "t", "r", "n", "o", "l", "\u017f", "m", "s", "c", "d", "p", ",", ".", "q", "b", "g", "f", "h", "v", "A", "I", "E", "\u00ac", "&", "x", "S", "\u0301", "\u0303", "y", "\u2019", "C", "P", "\u0300", "T", "R", "M", ":", "V", "\u00e6", "L", "N", "O", "D", "\ue8bf", "z", "Q", "j", "H", "G", "B", "F", "2", "\u0308", "-", "1", "'", "\u0153", ";", "?", "(", "\u0302", ")", "7", "U", "X", "3", "\u03bf", "\u03b9", "\u03b1", "5", "6", "4", "\u03b5", "\u0327", "\u03bd", "\u03c4", "8", "\u0313", "\u03c0", "9", "!", "J", "0", "\ua770", "\u03c2", "\u03bb", "\u03c5", "Y", "\u00a7", "\ua759", "\u00c6", "\u03c3", "\u0391", "\u03c9", "]", "Z", "/", "\u03c1", "k", "\u039f", "\u039d", "\u03b7", "\u0342", "\u03bc", "\u03ba", "*", "K", "\u03a5", "\u03b4", "\u03b8", "\ua757", "\u211f", "\u0395", "\u03a1", "\u03a9", "\u03a0", "\u0399", "\u03a4", "\u03c6", "\u0142", "\u030a", "\u039c", "\u0398", "\u03a3", "\u0392", "\u039b", "\u03b3", "|", "\u00bd", "\u0330", "\u00a0", "\u0314", "\u03c7", "\u03db", "\u00df", "\u0345", "\u0393", "\u0394", "W", "\u03a7", "\u03be", "\uf1a7", "#" ] }, "automatically-aligned": false, "_pid": "baa415760" }, "632310da4": { "authors": [ { "name": "Pinche", "orcid": "0000-0002-7843-5050", "roles": [ "transcriber", "aligner", "project-manager", "quality-control", "support" ], "surname": "Ariane" }, { "name": "Camps", "roles": [ "transcriber" ], "surname": "Jean-Baptiste" }, { "name": "Mariotti", "roles": [ "transcriber" ], "surname": "Viola" }, { "name": "Nolibois", "roles": [ "transcriber" ], "surname": "Alice" }, { "name": "Carnaille", "roles": [ "transcriber" ], "surname": "Camille" }, { "name": "Deleville", "roles": [ "transcriber" ], "surname": "Prunelle" }, { "name": "Lecomte", "roles": [ "transcriber" ], "surname": "Sophie" }, { "name": "Meylan", "roles": [ "transcriber" ], "surname": "Aminoel" }, { "name": "Ventura", "roles": [ "transcriber" ], "surname": "Simone" }, { "name": "Dugaz", "roles": [ "transcriber" ], "surname": "Lucien" } ], "characters": { "members": [ "e", "i", "s", "t", "n", "a", "r", "u", "o", "l", "c", "d", "m", "p", ".", "q", "f", "g", "\u0303", "z", "b", "h", "\u204a", "y", ":", "E", "x", "Q", "L", "S", "\ua751", "D", "\u033e", "\u0365", "C", "\ua76f", "\u0363", "A", "I", "M", "'", "\ua770", "\u0301", "T", "P", "O", "k", "N", "9", "U", "\u036c", "G", "R", "\u1dd1", "F", "\uf038", "\u0364", "&", "1", "B", "\ua753", "H", "\u0366", "\u1de4", "7", "2", "\u039b", "\u00f7", "\u0142", "6", "0", "3", "8", "4", "\u033d", "w", "-", "5", ",", "\u036d", "\u00b6", "Y", "\u1e9c", "\u00a0", "\u27e6", "\u27e7", "\u0368", "\u0308", "X", "\u0127", "K", "\u03b4", "/", "\u0167", "j" ], "mode": "NFD" }, "citation-file-link": "https://github.com/HTR-United/cremma-medieval/blob/main/citation.cff", "description": "Transcription corpora for training HTR models for medieval manuscripts from the 12th to the 15th century.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "language": [ "fra", "fro" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "CremmaLab", "project-website": "https://cremmalab.hypotheses.org", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1499", "notBefore": "1100" }, "title": "Cremma Medieval", "transcription-guidelines": "As the data come from different projects, transcriptions have been standardized to strengthen HTR models. We chose a graphemic transcription method, following D. Stutzmann definitions (see bibliography), to have a sign in the image corresponding to a sign in our text: all the abbreviations are kept, and u/v or i/j are not distinguished. The spaces in the dataset are not homogeneously represented, sometimes transcriptions reproduce the manuscript spacing while others use lexical spaces. It must be stressed that spaces are the most important source of error in medieval HTR models. Most of the transcription follow the layout segmentation of the SegmOnto ontology (https://github.com/SegmOnto/examples), separating the main column, margin, numbering, drop capital, etc. All the recommendations are described in\n the following document : Ariane Pinche, Guide de transcription pour les manuscrits du Xe au XVe si\u00e8cle, 2022, \u27e8hal-03697382>, en ligne : .", "url": "https://github.com/HTR-United/cremma-medieval", "volume": [ { "count": 612134, "metric": "characters" }, { "count": 279, "metric": "files" }, { "count": 22913, "metric": "lines" }, { "count": 1889, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Pinche, Ariane},\ndoi = {10.5281/zenodo.5235185},\nmonth = {6},\ntitle = {Cremma Medieval},\nurl = {https://github.com/HTR-United/cremma-medieval},\nyear = {2022}\n}\n", "_apa": "Pinche A. (2022). Cremma Medieval (version Bicerin 1.1.0). DOI: 10.5281/zenodo.5235185 URL: https://github.com/HTR-United/cremma-medieval\n", "_pid": "632310da4" }, "f99aaacfc": { "authors": [ { "name": "Chagu\u00e9", "orcid": "0000-0002-0136-4434", "roles": [ "project-manager", "quality-control", "digitization", "support" ], "surname": "Alix" }, { "name": "Cl\u00e9rice", "orcid": "0000-0003-1852-9204", "roles": [ "project-manager", "quality-control" ], "surname": "Thibault" }, { "name": "Van Kote", "roles": [ "aligner", "transcriber" ], "surname": "Elsa" }, { "name": "Carrow", "roles": [ "aligner", "transcriber", "support" ], "surname": "Jennifer" }, { "name": "Wissam", "roles": [ "support" ], "surname": "Antoum" }, { "name": "Yann", "roles": [ "support" ], "surname": "Audin" }, { "name": "Anne", "roles": [ "support" ], "surname": "Baillot" }, { "name": "Marl\u00e8ne", "roles": [ "support" ], "surname": "Baron" }, { "name": "Alexandre", "roles": [ "support" ], "surname": "Bartz" }, { "name": "Rachel", "roles": [ "support" ], "surname": "Bawden" }, { "name": "Alice", "roles": [ "support" ], "surname": "Beaudry-Lagarde" }, { "name": "Rishika", "roles": [ "support" ], "surname": "Bhagwatkar" }, { "name": "Federico", "roles": [ "support" ], "surname": "Boschetti" }, { "name": "Camille", "roles": [ "support" ], "surname": "Bourgeois" }, { "name": "Alice", "roles": [ "support" ], "surname": "Brenon" }, { "name": "William", "roles": [ "support" ], "surname": "Brubacher" }, { "name": "Donovan", "roles": [ "support" ], "surname": "Brunot" }, { "name": "Roxanne", "roles": [ "support" ], "surname": "Brusseau" }, { "name": "Talitha", "roles": [ "support" ], "surname": "Bueno Mottes" }, { "name": "Zo\u00e9", "roles": [ "support" ], "surname": "Cappe" }, { "name": "Roman", "roles": [ "support" ], "surname": "Castagn\u00e9" }, { "name": "Galo", "roles": [ "support" ], "surname": "Castillo" }, { "name": "Brigitte", "roles": [ "support" ], "surname": "Chagu\u00e9" }, { "name": "Denis", "roles": [ "support" ], "surname": "Chagu\u00e9" }, { "name": "Emeric", "roles": [ "support" ], "surname": "Chagu\u00e9" }, { "name": "L\u00e9a", "roles": [ "support" ], "surname": "Charette" }, { "name": "Emmanuel", "roles": [ "support" ], "surname": "Chateau" }, { "name": "Jean-Baptiste", "roles": [ "support" ], "surname": "Chaudron" }, { "name": "Anna", "roles": [ "support" ], "surname": "Chepaikina" }, { "name": "Floriane", "roles": [ "support" ], "surname": "Chiffoleau" }, { "name": "Kelly", "roles": [ "support" ], "surname": "Christensen" }, { "name": "Federico", "roles": [ "support" ], "surname": "Cuartas Aristizabal" }, { "name": "Maria Laura", "roles": [ "support" ], "surname": "Cucciniello" }, { "name": "Aurore", "roles": [ "support" ], "surname": "Cu\u00e9llar" }, { "name": "Baudoin", "roles": [ "support" ], "surname": "Davoury" }, { "name": "Eric", "roles": [ "support" ], "surname": "de la Clergerie" }, { "name": "Roch", "roles": [ "support" ], "surname": "Delanney" }, { "name": "Camille", "roles": [ "support" ], "surname": "Delattre" }, { "name": "B\u00e9atrice", "roles": [ "support" ], "surname": "Denis" }, { "name": "Philippe", "roles": [ "support" ], "surname": "Deschamps" }, { "name": "Valentine", "roles": [ "support" ], "surname": "Desmorat" }, { "name": "Cindy", "roles": [ "support" ], "surname": "Dionisio" }, { "name": "Am\u00e9lie", "roles": [ "support" ], "surname": "Disant" }, { "name": "Elsa", "roles": [ "support" ], "surname": "Dufourg" }, { "name": "Jean-Luc", "roles": [ "support" ], "surname": "Falcone" }, { "name": "Margaux", "roles": [ "support" ], "surname": "Faure" }, { "name": "Glenda", "roles": [ "support" ], "surname": "Ferbeyre Rodriguez" }, { "name": "Giulia", "roles": [ "support" ], "surname": "Ferretti" }, { "name": "Fabien", "roles": [ "support" ], "surname": "Fizaine" }, { "name": "Jeanne", "roles": [ "support" ], "surname": "Flamant" }, { "name": "Cl\u00e9mence", "roles": [ "support" ], "surname": "Foisy-Marquis" }, { "name": "Anna", "roles": [ "support" ], "surname": "Fr\u00f6hlich" }, { "name": "Anne", "roles": [ "support" ], "surname": "Garcia Fernancez" }, { "name": "Vincent", "roles": [ "support" ], "surname": "Giovannangeli" }, { "name": "Gabrielle", "roles": [ "support" ], "surname": "Grondin" }, { "name": "Morgane", "roles": [ "support" ], "surname": "Guichard" }, { "name": "Jessica", "roles": [ "support" ], "surname": "Guiraud" }, { "name": "Anahi", "roles": [ "support" ], "surname": "Haedo" }, { "name": "Pauline", "roles": [ "support" ], "surname": "Hennequart" }, { "name": "Yanet", "roles": [ "support" ], "surname": "Hernandez Pedroza" }, { "name": "Lucence", "roles": [ "support" ], "surname": "Ing" }, { "name": "Pauline", "roles": [ "support" ], "surname": "Jacsont" }, { "name": "Juliette", "roles": [ "support" ], "surname": "Janes" }, { "name": "Corinne", "roles": [ "support" ], "surname": "Jeanne" }, { "name": "Arilys", "roles": [ "support" ], "surname": "Jia" }, { "name": "Vincent", "roles": [ "support" ], "surname": "Jolivet" }, { "name": "Katrina", "roles": [ "support" ], "surname": "Kaustina" }, { "name": "Ben", "roles": [ "support" ], "surname": "Kiessling" }, { "name": "Ozcar", "roles": [ "support" ], "surname": "Koc" }, { "name": "Lena", "roles": [ "support" ], "surname": "Krause" }, { "name": "Gabriel", "roles": [ "support" ], "surname": "Labrie" }, { "name": "Am\u00e9lie", "roles": [ "support" ], "surname": "Lapointe" }, { "name": "David", "roles": [ "support" ], "surname": "Lassner" }, { "name": "Emmanuelle", "roles": [ "support" ], "surname": "Lescouet" }, { "name": "Danny", "roles": [ "support" ], "surname": "L\u00e9tourneau" }, { "name": "Marie-Fran\u00e7oise", "roles": [ "support" ], "surname": "Limon-Bonnet" }, { "name": "Gabrielle", "roles": [ "support" ], "surname": "Lodi" }, { "name": "Victoria", "roles": [ "support" ], "surname": "Lupascu" }, { "name": "Elsa", "roles": [ "support" ], "surname": "Marguin-Hamon" }, { "name": "Orestis", "roles": [ "support" ], "surname": "Marinamis" }, { "name": "Gina", "roles": [ "support" ], "surname": "Mars" }, { "name": "Eug\u00e9nie", "roles": [ "support" ], "surname": "Matthey-Jonais" }, { "name": "Dilson", "roles": [ "support" ], "surname": "Mayunga" }, { "name": "Margot", "roles": [ "support" ], "surname": "Mellet" }, { "name": "Matt", "roles": [ "support" ], "surname": "Moskal" }, { "name": "Shannon", "roles": [ "support" ], "surname": "Moskal" }, { "name": "Zo\u00e9", "roles": [ "support" ], "surname": "Mozin" }, { "name": "Lydia", "orcid": "0009-0009-7082-4711", "roles": [ "support" ], "surname": "Nishimwe" }, { "name": "Jade", "roles": [ "support" ], "surname": "Norindr" }, { "name": "Jules", "roles": [ "support" ], "surname": "Nuguet" }, { "name": "Sarah", "roles": [ "support" ], "surname": "Orsini" }, { "name": "Pedro", "roles": [ "support" ], "surname": "Ortiz Suarez" }, { "name": "Kenan", "roles": [ "support" ], "surname": "Oudin" }, { "name": "Gabrielle", "roles": [ "support" ], "surname": "Pannetier-Leboeuf" }, { "name": "Thierry", "roles": [ "support" ], "surname": "Paquet" }, { "name": "Thomas", "roles": [ "support" ], "surname": "Parisot" }, { "name": "Elodie", "roles": [ "support" ], "surname": "Paupe" }, { "name": "Ga\u00ebl", "roles": [ "support" ], "surname": "Poux" }, { "name": "Montaine", "roles": [ "support" ], "surname": "Proph\u00eate" }, { "name": "Alix", "roles": [ "support" ], "surname": "Raoux" }, { "name": "Ga\u00ebtan", "roles": [ "support" ], "surname": "Raoux" }, { "name": "Elise", "roles": [ "support" ], "surname": "Razafindrakoto" }, { "name": "Camille", "roles": [ "support" ], "surname": "Rey" }, { "name": "Arij", "roles": [ "support" ], "surname": "Riabi" }, { "name": "Karen", "roles": [ "support" ], "surname": "Ross" }, { "name": "Manon", "roles": [ "support" ], "surname": "Rouill\u00e9" }, { "name": "Louise", "roles": [ "support" ], "surname": "Ruby" }, { "name": "Beno\u00eet", "roles": [ "support" ], "surname": "Sagot" }, { "name": "Hugo", "roles": [ "support" ], "surname": "Scheithauer" }, { "name": "Anne-Val\u00e9rie", "roles": [ "support" ], "surname": "Schweyer" }, { "name": "Djam\u00e9", "roles": [ "support" ], "surname": "Seddah" }, { "name": "Paula", "roles": [ "support" ], "surname": "Seidel" }, { "name": "Peter", "roles": [ "support" ], "surname": "Stokes" }, { "name": "Yves", "roles": [ "support" ], "surname": "Tadjo" }, { "name": "Lionel", "roles": [ "support" ], "surname": "Tadjou" }, { "name": "Kristin", "roles": [ "support" ], "surname": "Tanton" }, { "name": "Marie", "roles": [ "support" ], "surname": "Tariol" }, { "name": "Rian", "roles": [ "support" ], "surname": "Touchent" }, { "name": "Anne-Kim", "roles": [ "support" ], "surname": "Tremblay" }, { "name": "Pierre", "roles": [ "support" ], "surname": "Vauterin" }, { "name": "Mathilde", "roles": [ "support" ], "surname": "Verstraete" }, { "name": "Magalie", "roles": [ "support" ], "surname": "Vetter" }, { "name": "Marcello", "roles": [ "support" ], "surname": "Vitali Rosati" }, { "name": "Malamatenia", "roles": [ "support" ], "surname": "Vlachou-Estathiou" }, { "name": "Rosanne", "roles": [ "support" ], "surname": "Wingert" }, { "name": "D\u00e9bora", "roles": [ "support" ], "surname": "Yi" }, { "name": "Antoine\"", "roles": [ "support" ], "surname": "" }, { "name": "Camille", "roles": [ "support" ], "surname": "" }, { "name": "Manon", "roles": [ "support" ], "surname": "" }, { "name": "Yohan", "roles": [ "support" ], "surname": "" } ], "characters": { "members": [ "e", "a", "n", "i", "s", "t", "r", "l", "o", "u", "d", "c", "m", "p", "\u0301", ",", "g", "h", "v", "f", ".", "b", "\u0300", "'", "q", "1", "L", "y", "0", "C", "9", "E", "S", "2", "-", "A", "(", ")", "I", "x", "k", "M", "P", "R", "j", "B", "8", "T", "N", "D", "\u0302", "6", "4", "O", "G", "3", "5", "7", "F", "H", "U", "w", "V", "=", "z", "\u0327", "J", ":", "\u0308", "W", "K", ">", "<", "\"", "\u00ab", "\u00bb", "Y", "X", "[", "]", "^", "/", "\u017f", "\u0304", ";", "Q", "Z", "\u0153", "\u030c", "!", "\u2019", "\u00f8", "\u0303", "%", "&", "\u2013", "\u025b", "\u030a", "\u00b0", "\u00df", "\u0279", "\u2014", "\u00c6", "\u00b2", "\u0306", "\u1455", "#", "\u0259", "\u20ac", "\u2026", "\u0142", "\u00a0", "\u0251", "\u0254", "\u0281" ], "mode": "NFD" }, "description": "The CREMMA-WIKIPEDIA project aims at creating a collection of ground truth to train HTR models on contemporary French handwriting.\n\nEach image represents an exerpt from a randomly selected Wikipedia page, copied by hand by volunteers. We then took care of the alignment between the handwritten portion and the original text, also present on the image.", "format": "Alto-XML", "hands": { "count": "1-per-file", "precision": "estimated" }, "institutions": [ { "name": "6e-1 du Coll\u00e8ge Martin-Luther-King de Charvieu-Chavagneux", "roles": [ "support" ] } ], "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "CREMMA", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "2023", "notBefore": "2022" }, "title": "CREMMA WIKIPEDIA", "transcription-guidelines": "The transcription guidelines follow CREMMA's convention (https://gist.github.com/alix-tz/6f89444521bf1cab0522da520f7e4ff4). In short: superscript is preceded by a ^. Strikethrough elements are transcribed with \"><\" when unreadable, \">word<\" when readeable. The text to copy may have included phonetic transcription. Non-french letters and diacritics were rendered as well. ", "url": "https://github.com/HTR-United/cremma-wikipedia", "volume": [ { "count": 99680, "metric": "characters" }, { "count": 350, "metric": "files" }, { "count": 1971, "metric": "lines" }, { "count": 351, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chagu\u00e9, Alix and Cl\u00e9rice, Thibault and Van Kote, Elsa and Carrow, Jennifer and Antoum, Wissam and Audin, Yann and Baillot, Anne and Baron, Marl\u00e8ne and Bartz, Alexandre and Bawden, Rachel and Beaudry-Lagarde, Alice and Bhagwatkar, Rishika and Boschetti, Federico and Bourgeois, Camille and Brenon, Alice and Brubacher, William and Brunot, Donovan and Brusseau, Roxanne and Bueno Mottes, Talitha and Cappe, Zo\u00e9 and Castagn\u00e9, Roman and Castillo, Galo and Chagu\u00e9, Brigitte and Chagu\u00e9, Denis and Chagu\u00e9, Emeric and Charette, L\u00e9a and Chateau, Emmanuel and Chaudron, Jean-Baptiste and Chepaikina, Anna and Chiffoleau, Floriane and Christensen, Kelly and Cuartas Aristizabal, Federico and Cucciniello, Maria Laura and Cu\u00e9llar, Aurore and Davoury, Baudoin and de la Clergerie, Eric and Delanney, Roch and Delattre, Camille and Denis, B\u00e9atrice and Deschamps, Philippe and Desmorat, Valentine and Dionisio, Cindy and Disant, Am\u00e9lie and Dufourg, Elsa and Falcone, Jean-Luc and Faure, Margaux and Ferbeyre Rodriguez, Glenda and Ferretti, Giulia and Fizaine, Fabien and Flamant, Jeanne and Foisy-Marquis, Cl\u00e9mence and Fr\u00f6hlich, Anna and Garcia Fernancez, Anne and Giovannangeli, Vincent and Grondin, Gabrielle and Guichard, Morgane and Guiraud, Jessica and Haedo, Anahi and Hennequart, Pauline and Hernandez Pedroza, Yanet and Ing, Lucence and Jacsont, Pauline and Janes, Juliette and Jeanne, Corinne and Jia, Arilys and Jolivet, Vincent and Kaustina, Katrina and Kiessling, Ben and Koc, Ozcar and Krause, Lena and Labrie, Gabriel and Lapointe, Am\u00e9lie and Lassner, David and Lescouet, Emmanuelle and L\u00e9tourneau, Danny and Limon-Bonnet, Marie-Fran\u00e7oise and Lodi, Gabrielle and Lupascu, Victoria and Marguin-Hamon, Elsa and Marinamis, Orestis and Mars, Gina and Matthey-Jonais, Eug\u00e9nie and Mayunga, Dilson and Mellet, Margot and Moskal, Matt and Moskal, Shannon and Mozin, Zo\u00e9 and Nishimwe, Lydia and Norindr, Jade and Nuguet, Jules and Orsini, Sarah and Ortiz Suarez, Pedro and Oudin, Kenan and Pannetier-Leboeuf, Gabrielle and Paquet, Thierry and Parisot, Thomas and Paupe, Elodie and Poux, Ga\u00ebl and Proph\u00eate, Montaine and Raoux, Alix and Raoux, Ga\u00ebtan and Razafindrakoto, Elise and Rey, Camille and Riabi, Arij and Ross, Karen and Rouill\u00e9, Manon and Ruby, Louise and Sagot, Beno\u00eet and Scheithauer, Hugo and Schweyer, Anne-Val\u00e9rie and Seddah, Djam\u00e9 and Seidel, Paula and Stokes, Peter and Tadjo, Yves and Tadjou, Lionel and Tanton, Kristin and Tariol, Marie and Touchent, Rian and Tremblay, Anne-Kim and Vauterin, Pierre and Verstraete, Mathilde and Vetter, Magalie and Vitali Rosati, Marcello and Vlachou-Estathiou, Malamatenia and Wingert, Rosanne and Yi, D\u00e9bora and other anonymous contributers},\ndoi = {10.5281/zenodo.7782065},\nmonth = {3},\ntitle = {CREMMA WIKIPEDIA},\nurl = {https://github.com/HTR-United/cremma-wikipedia},\nyear = {2023}\n}\n", "_apa": "Chagu\u00e9 A., Cl\u00e9rice T., Van Kote E., Carrow J., Antoum W., Audin Y., Baillot A., Baron M., Bartz A., Bawden R., Beaudry-Lagarde A., Bhagwatkar R., Boschetti F., Bourgeois C., Brenon A., Brubacher W., Brunot D., Brusseau R., Bueno Mottes T., Cappe Z., Castagn\u00e9 R., Castillo G., Chagu\u00e9 B., Chagu\u00e9 D., Chagu\u00e9 E., Charette L., Chateau E., Chaudron J., Chepaikina A., Chiffoleau F., Christensen K., Cuartas Aristizabal F., Cucciniello M.L., Cu\u00e9llar A., Davoury B., de la Clergerie E., Delanney R., Delattre C., Denis B., Deschamps P., Desmorat V., Dionisio C., Disant A., Dufourg E., Falcone J., Faure M., Ferbeyre Rodriguez G., Ferretti G., Fizaine F., Flamant J., Foisy-Marquis C., Fr\u00f6hlich A., Garcia Fernancez A., Giovannangeli V., Grondin G., Guichard M., Guiraud J., Haedo A., Hennequart P., Hernandez Pedroza Y., Ing L., Jacsont P., Janes J., Jeanne C., Jia A., Jolivet V., Kaustina K., Kiessling B., Koc O., Krause L., Labrie G., Lapointe A., Lassner D., Lescouet E., L\u00e9tourneau D., Limon-Bonnet M., Lodi G., Lupascu V., Marguin-Hamon E., Marinamis O., Mars G., Matthey-Jonais E., Mayunga D., Mellet M., Moskal M., Moskal S., Mozin Z., Nishimwe L., Norindr J., Nuguet J., Orsini S., Ortiz Suarez P., Oudin K., Pannetier-Leboeuf G., Paquet T., Parisot T., Paupe E., Poux G., Proph\u00eate M., Raoux A., Raoux G., Razafindrakoto E., Rey C., Riabi A., Ross K., Rouill\u00e9 M., Ruby L., Sagot B., Scheithauer H., Schweyer A., Seddah D., Seidel P., Stokes P., Tadjo Y., Tadjou L., Tanton K., Tariol M., Touchent R., Tremblay A., Vauterin P., Verstraete M., Vetter M., Vitali Rosati M., Vlachou-Estathiou M., Wingert R., Yi D., other anonymous contributers (2023). CREMMA WIKIPEDIA (version 1.0.3). DOI: 10.5281/zenodo.7782065 URL: https://github.com/HTR-United/cremma-wikipedia\n", "_pid": "f99aaacfc" }, "bde559631": { "authors": [ { "name": "Chiffoleau", "roles": [ "project-manager", "aligner" ], "surname": "Floriane" } ], "characters": { "members": [ "e", "s", "a", "n", "r", "i", "t", "u", "o", "l", "d", "c", "m", "p", "\u0301", ",", "v", ".", "f", "q", "g", "\u0300", "-", "E", "b", "\u2019", "'", "h", "A", "L", "N", "x", "j", "S", "R", "I", "T", "M", "\u0302", "C", "P", "y", "O", ";", "1", "\u00a3", "U", "D", "B", "F", "J", "G", "\"", "0", "z", "V", "9", "2", ":", "X", "\u00a0", "\u20ac", "H", "5", "!", "3", "4", "\u0327", "\u00b0", "W", "Y", "6", "8", "?", "7", "K", "Q", "/", "(", ")", "k", "\u0153", "w", "\u0308", "\u2026", "Z", "\u2013", "&", "%", "=", "$", "_" ], "mode": "NFD" }, "description": "OCR ground Truth dataset based on French 20th typewritten letters", "format": "Alto-XML", "hands": { "count": "less-than-11", "precision": "exact" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "DAHN", "project-website": "https://digitalintellectuals.hypotheses.org/category/dahn", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1924", "notBefore": "1914" }, "title": "DAHN Corpus", "url": "https://github.com/HTR-United/dahncorpus", "volume": [ { "count": 475849, "metric": "characters" }, { "count": 547, "metric": "files" }, { "count": 12539, "metric": "lines" }, { "count": 527, "metric": "pages" }, { "count": 547, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chiffoleau, Floriane},\ndoi = {10.5281/zenodo.5911868},\nmonth = {3},\ntitle = {dahncorpus},\nurl = {https://github.com/HTR-United/dahncorpus},\nyear = {2021}\n}\n", "_apa": "Chiffoleau F. (2021). dahncorpus (version 1.0.0). DOI: 10.5281/zenodo.5911868 URL: https://github.com/HTR-United/dahncorpus\n", "_pid": "bde559631" }, "507bb514d": { "authors": [ { "name": "Limon-Bonnet", "roles": [ "transcriber", "aligner", "quality-control" ], "surname": "Fran\u00e7oise" }, { "name": "Chagu\u00e9", "roles": [ "support", "project-manager", "quality-control" ], "surname": "Alix" }, { "name": "Rostaing", "roles": [ "project-manager" ], "surname": "Aur\u00e9lia" } ], "characters": { "members": [ "e", "t", "a", "/", "0", "c", "n", "r", "m", "h", "p", "s", "o", "g", "5", "7", "1", "E", ".", "i", "-", "3", "9", "2", "f", "d", "8", "<", "l", "{", ":", "P", "A", "G", "}", "U", "x", ">", "b", "4", "6" ], "mode": "NFD" }, "citation-file-link": "https://raw.githubusercontent.com/HTR-United/lectaurep-bronod/master/CITATION.cff", "description": "Ground truth for Ma\u00eetre Bronod\u2019s registers, notary in Paris during the 18th century.\n", "format": "Page-XML", "hands": { "count": "1", "precision": "exact" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "LECTAUREP\n", "project-website": "https://lectaurep.hypotheses.org/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "sources": [ { "link": "", "reference": "Limon-Bonnet, M. (2021). Lectaurep-Bronod, ground truth for Maitre Bronod\\u0027s documents (French XVIIIth century) (Version 1.0) [Computer software]. https://doi.org/10.5072/zenodo.977735" } ], "time": { "notAfter": "1745", "notBefore": "1742" }, "title": "Notaires de Paris - Bronod", "transcription-guidelines": "Transcription fid\u00e8le aux manuscrits : la casse et les abr\u00e9viations sont respect\u00e9es. Les portions de texte suscrites sont pr\u00e9c\u00e9d\u00e9es d'un symbole `^`. Pas de traitement particulier des \u00e9ventuels s longs.'\n", "url": "https://github.com/HTR-United/lectaurep-bronod", "volume": [ { "count": 359094, "metric": "characters" }, { "count": 100, "metric": "files" }, { "count": 3702, "metric": "lines" }, { "count": 200, "metric": "pages" }, { "count": 296, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Limon-Bonnet, Marie-Fran\u00e7oise and Chagu\u00e9, Alix and Rostaing, Aur\u00e9lia},\ndoi = {10.5281/zenodo.10631355},\nmonth = {2},\ntitle = {Lectaurep-Bronod, ground truth for Maitre Bronod's documents (French XVIIIth century)},\nurl = {https://lectaurep.hypotheses.org/},\nyear = {2024}\n}\n", "_apa": "Limon-Bonnet M., Chagu\u00e9 A., Rostaing A. (2024). Lectaurep-Bronod, ground truth for Maitre Bronod's documents (French XVIIIth century) DOI: 10.5281/zenodo.10631355 URL: https://lectaurep.hypotheses.org/\n", "_pid": "507bb514d" }, "a6e899c14": { "authors": [ { "name": "Denis", "roles": [ "transcriber", "aligner" ], "surname": "Nathalie" }, { "name": "Rostaing", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Aur\u00e9lia" }, { "name": "Chagu\u00e9", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Alix" } ], "characters": { "members": [ "e", "t", "/", "a", "c", "0", "n", "r", "m", "h", "p", "s", "o", "g", "1", "7", "2", "E", ".", "i", "-", "f", "9", "d", "8", "5", "<", "l", "{", ":", "P", "A", "G", "}", "U", "x", ">", "b", "4", "6", "3" ], "mode": "NFD" }, "citation-file-link": "https://raw.githubusercontent.com/HTR-United/lectaurep-mariages-et-divorces/main/CITATION.cff", "description": "Ground truth for the Registres des Contrats de Mariages et des S\u00e9parations et Divorces in Paris. The documents are written in Franch during the 19th century, contain many names and addresses. The information is organized in tables spreading on two pages. The table\u2019s headers and the preamble are printed.\n", "format": "Page-XML", "hands": { "count": "more-than-10", "precision": "estimated" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "LECTAUREP\n", "project-website": "https://lectaurep.hypotheses.org/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "sources": [ { "link": "", "reference": "Rostaing, A., Denis, N., & Chagu\u00e9, A. (2021). Lectaurep-Mariages-et-Divorces: ground truth for the Enregistrements des Contrats de Mariages et des S\u00e9parations et Divorces in Paris (French 19th century) (Version 1.0) [Computer software]. https://doi.org/10.5072/zenodo.977697" } ], "time": { "notAfter": "1928", "notBefore": "1829" }, "title": "Notaires de Paris - Mariages et Divorces", "transcription-guidelines": "The transcription respects what is written (abbreviations are not developed, capitalization follows 19th century practices). Superscripted portions of text are signaled by `^` and many signatures are transcription with \u00a5. The lines containing printed text are associated with the type `printed` and the signatures are associated with the type `signature`. Thus they can both be removed from the dataset if necessary.'\n", "url": "https://github.com/HTR-United/lectaurep-mariages-et-divorces", "volume": [ { "count": 1969488, "metric": "characters" }, { "count": 104, "metric": "files" }, { "count": 20304, "metric": "lines" }, { "count": 105, "metric": "pages" }, { "count": 324, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Rostaing, Aur\u00e9lia and Denis, Nathalie and Chagu\u00e9, Alix},\ndoi = {10.5281/zenodo.10632593},\nmonth = {2},\ntitle = {Lectaurep-Mariages-et-Divorces: ground truth for the Enregistrements des Contrats de Mariages et des S\u00e9parations et Divorces in Paris (French 19th century) },\nurl = {https://github.com/HTR-United/lectaurep-mariages-et-divorces},\nyear = {2024}\n}\n", "_apa": "Rostaing A., Denis N., Chagu\u00e9 A. (2024). Lectaurep-Mariages-et-Divorces: ground truth for the Enregistrements des Contrats de Mariages et des S\u00e9parations et Divorces in Paris (French 19th century) (version 2.0). DOI: 10.5281/zenodo.10632593 URL: https://github.com/HTR-United/lectaurep-mariages-et-divorces\n", "_pid": "a6e899c14" }, "a8b397410": { "authors": [ { "name": "Durand", "roles": [ "transcriber", "aligner" ], "surname": "Marc" }, { "name": "Rostaing", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Aur\u00e9lia" }, { "name": "Chagu\u00e9", "roles": [ "project-manager", "quality-control", "support" ], "surname": "Alix" } ], "characters": { "members": [ "e", "r", "a", "i", "n", "t", "o", "u", "s", "d", "l", "c", "p", "1", "m", "S", "\u0300", ",", "E", "\u0301", "2", "P", ".", "M", "0", "A", "C", "5", "3", "h", "T", "v", "g", "D", "7", ")", "(", "R", "N", "f", "I", "b", "L", "8", "9", "^", "4", "6", "B", "O", "J", "V", "y", "'", "G", "F", "-", "x", "q", "\u00b0", "H", "\u0302", "U", "\"", "X", "&", "z", ";", "\u0327", ":", "j", "+", "Q", "|", "\u0308", "/", "k", "=", "%", "W", "K", "Y", "Z", "w", "~", "\u00a5", "\u023c", "_", "\u20ac", "`", "[", "]", "\u0153", "?", "*", "\u0303", ">", "\u00bd" ], "mode": "NFD" }, "citation-file-link": "https://github.com/HTR-United/lectaurep-repertoires/raw/main/CITATION.cff", "description": "Ground truth for various Parisian registries of notary deeds written in French during the 19th century. The information is organized following pre-printed tables (with printed headers) and contain many names, addresses, numbers and abbreviations.", "format": "Alto-XML", "hands": { "count": "more-than-10", "precision": "estimated" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "LECTAUREP", "project-website": "https://lectaurep.hypotheses.org/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notAfter": "1939", "notBefore": "1830" }, "title": "Notaires de Paris - R\u00e9pertoires", "url": "https://github.com/HTR-United/lectaurep-repertoires", "volume": [ { "count": 525786, "metric": "characters" }, { "count": 218, "metric": "files" }, { "count": 29410, "metric": "lines" }, { "count": 1181, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {LECTAUREP and Rostaing, Aur\u00e9lia and Durand, Marc and Chagu\u00e9, Alix},\ndoi = {10.5072/zenodo.977691},\nmonth = {12},\ntitle = {Notaires de Paris - R\u00e9pertoires, ground truth for various Parisian registries of notary deeds (French 19th and 20th centuries)},\nurl = {https://github.com/HTR-United/lectaurep-repertoires},\nyear = {2021}\n}\n", "_apa": "LECTAUREP, Rostaing A., Durand M., Chagu\u00e9 A. (2021). Notaires de Paris - R\u00e9pertoires, ground truth for various Parisian registries of notary deeds (French 19th and 20th centuries) (version 2.0.0). DOI: 10.5072/zenodo.977691 URL: https://github.com/HTR-United/lectaurep-repertoires\n", "_pid": "a8b397410" }, "c5d771d11": { "authors": [ { "name": "Chagu\u00e9", "roles": [ "transcriber", "project-manager" ], "surname": "Alix" } ], "characters": { "members": [ "e", "a", "s", "n", "t", "r", "i", "u", "o", "l", "d", "c", "m", "p", "\u0301", ".", "~", "v", ",", "'", "-", "f", "g", "h", "q", "b", "\u0300", "_", "E", "L", "A", "I", "C", "x", "S", "M", "j", "T", "\u0302", "R", "N", "1", "O", "P", "y", "\"", "U", "J", "D", "2", ":", ")", "(", "B", "0", "5", "3", "4", "z", "6", "F", "H", "Q", "!", "9", "G", "7", "V", "8", "?", "\u27e6", "\u27e7", "\u0327", "Y", ";", "\u2019", "\u00b0", "k", "X", "\u0308", "+", "=", "W", "/", "K", "^", "w", "Z", "%", "*" ], "mode": "NFD" }, "citation-file-link": "https://github.com/HTR-United/tapuscorpus/raw/main/citation.cff", "description": "Ground truth based on a variety of French typewritten documents from the 20th century. Contains exerpts plays, poems, letters and administrative reports.", "format": "Page-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "language": [ "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "HTR-United\n", "project-website": "https://htr-united.github.io/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "sources": [ { "link": "", "reference": "Chagu\u00e9, A. (2021). Tapuscorpus (Version 1.0) [Computer software]. https://doi.org/10.5072/zenodo.977649" } ], "time": { "notAfter": "1999", "notBefore": "1900" }, "title": "Tapus Corpus", "transcription-guidelines": "See README in repository.", "url": "https://github.com/HTR-United/tapuscorpus", "volume": [ { "count": 131511, "metric": "characters" }, { "count": 151, "metric": "files" }, { "count": 4376, "metric": "lines" }, { "count": 150, "metric": "pages" }, { "count": 375, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chagu\u00e9, Alix},\ndoi = {10.5072/zenodo.977649},\nmonth = {12},\ntitle = {Tapuscorpus},\nurl = {https://github.com/HTR-United/tapuscorpus},\nyear = {2021}\n}\n", "_apa": "Chagu\u00e9 A. (2021). Tapuscorpus (version 1.0). DOI: 10.5072/zenodo.977649 URL: https://github.com/HTR-United/tapuscorpus Time Us Corpus DOI: 10.5281/zenodo.6230755 URL: https://github.com/HTR-United/timeuscorpus Manuscripts vary in themes, period, etc. Most manuscript have at most 10 columns transcribed.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "institutions": [], "language": [ "fro" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "HTRomance", "project-website": "https://htromance-project.github.io", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1499", "notBefore": "1200" }, "title": "HTRomance, Medieval French corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation", "transcription-guidelines": "\nThe transcription guidelines are described in a paper available on [HAL](https://hal-enc.archives-ouvertes.fr/hal-03828353) and published at the Journal for Open Humanities Data. It provides specific details about the selection process, the transcription methods and choices, as well as details about output (mainly the [Generic CREMMA Model for Medieval Manuscripts (Latin and Old French)](https://zenodo.org/record/7234166#.Y7f69afMJhE) for [Kraken](https://kraken.re))", "url": "https://github.com/HTRomance-Project/medieval-french", "volume": [ { "count": 300070, "metric": "characters" }, { "count": 138, "metric": "files" }, { "count": 10385, "metric": "lines" }, { "count": 789, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Leroy, No\u00e9 and Pinche, Ariane and Camps, Jean-Baptiste and Cl\u00e9rice, Thibault and Chagu\u00e9, Alix},\ntitle = {HTRomance, Medieval French corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation},\nurl = {https://github.com/HTRomance-Project/middle-ages-in-spain}\n}\n", "_apa": "Leroy N., Pinche A., Camps J., Cl\u00e9rice T., Chagu\u00e9 A. HTRomance, Medieval French corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation URL: https://github.com/HTRomance-Project/middle-ages-in-spain HTRomance, Medieval Italian corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation DOI: 10.5281/zenodo.8256728 URL: https://github.com/HTRomance-Project/medieval-italian Manuscripts vary in themes, period, etc. Most manuscript have at most 10 columns transcribed.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "institutions": [], "language": [ "lat" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "HTRomance", "project-website": "https://htromance-project.github.io", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1499", "notBefore": "1100" }, "title": "HTRomance, Medieval Latin corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation", "transcription-guidelines": "\nThe transcription guidelines are described in a paper available on [HAL](https://hal-enc.archives-ouvertes.fr/hal-03828353) and published at the Journal for Open Humanities Data. It provides specific details about the selection process, the transcription methods and choices, as well as details about output (mainly the [Generic CREMMA Model for Medieval Manuscripts (Latin and Old French)](https://zenodo.org/record/7234166#.Y7f69afMJhE) for [Kraken](https://kraken.re))", "url": "https://github.com/HTRomance-Project/medieval-latin", "volume": [ { "count": 299062, "metric": "characters" }, { "count": 142, "metric": "files" }, { "count": 8879, "metric": "lines" }, { "count": 749, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Glaise, Anthony and Cl\u00e9rice, Thibault and Boschetti, Federico and Fischer, Franz and Chagu\u00e9, Alix},\ntitle = {HTRomance, Medieval Latin corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation},\nurl = {https://github.com/HTRomance-Project/medieval-latin}\n}\n", "_apa": "Glaise A., Cl\u00e9rice T., Boschetti F., Fischer F., Chagu\u00e9 A. HTRomance, Medieval Latin corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation URL: https://github.com/HTRomance-Project/medieval-latin Manuscripts vary in themes, period, etc. Most manuscript have at most 10 columns transcribed.", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "estimated" }, "institutions": [], "language": [ "lat" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "HTRomance", "project-website": "https://htromance-project.github.io", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1499", "notBefore": "1100" }, "title": "HTRomance, Medieval Spain corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation", "transcription-guidelines": "\nThe transcription guidelines are described in a paper available on [HAL](https://hal-enc.archives-ouvertes.fr/hal-03828353) and published at the Journal for Open Humanities Data. It provides specific details about the selection process, the transcription methods and choices, as well as details about output (mainly the [Generic CREMMA Model for Medieval Manuscripts (Latin and Old French)](https://zenodo.org/record/7234166#.Y7f69afMJhE) for [Kraken](https://kraken.re))", "url": "https://github.com/HTRomance-Project/middle-ages-in-spain", "volume": [ { "count": 160876, "metric": "characters" }, { "count": 86, "metric": "files" }, { "count": 4437, "metric": "lines" }, { "count": 395, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Bordier, Julie and Gille Levenson, Matthias and Brisville-Fertin, Olivier and Cl\u00e9rice, Thibault and Chagu\u00e9, Alix},\ntitle = {HTRomance, Medieval Spain corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation},\nurl = {https://github.com/HTRomance-Project/middle-ages-in-spain}\n}\n", "_apa": "Bordier J., Gille Levenson M., Brisville-Fertin O., Cl\u00e9rice T., Chagu\u00e9 A. HTRomance, Medieval Spain corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation URL: https://github.com/HTRomance-Project/middle-ages-in-spain It provides specific details about the selection process, the transcription methods and choices, as well as details about output (mainly the Generic CREMMA Model for Medieval Manuscripts (Latin and Old French) for Kraken)", "url": "https://github.com/HTRomance-Project/modern-roman-languages", "volume": [ { "count": 114094, "metric": "characters" }, { "count": 168, "metric": "files" }, { "count": 3386, "metric": "lines" }, { "count": 441, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Norindr, Jade and Mikhalchuk, Anna and Cl\u00e9rice, Thibault and Chagu\u00e9, Alix},\ntitle = {HTRomance, Modern language corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation},\nurl = {https://github.com/HTRomance-Project/modern-roman-languages}\n}\n", "_apa": "Norindr J., Mikhalchuk A., Cl\u00e9rice T., Chagu\u00e9 A. HTRomance, Modern language corpus of ground-truth for Handwritten Text Recognition and Layout Segmentation URL: https://github.com/HTRomance-Project/modern-roman-languages The data is based on transcription data stored in the German Text Archive (DTA) (https://www.deutschestextarchiv.de/).", "project-name": "OCR-D", "project-website": "https://ocr-d.de/", "language": [ "eng", "fra", "deu", "heb", "lat" ], "production-software": "Aletheia", "automatically-aligned": false, "script": [ { "iso": "Latn" }, { "iso": "Goth" } ], "script-type": "only-typed", "time": { "notAfter": "1900", "notBefore": "1500" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" }, "format": "Page-XML", "volume": [ { "count": 640976, "metric": "characters" }, { "count": 217, "metric": "files" }, { "count": 6608, "metric": "lines" }, { "count": 1647, "metric": "regions" } ], "citation-file-link": "https://raw.githubusercontent.com/OCR-D/gt_structure_text/main/CITATION.cff", "transcription-guidelines": "OCR-D Ground Truth Guidelines https://ocr-d.de/en/gt-guidelines/trans/", "_bibtex": "@misc{YourReferenceHere,\nauthor = {Boenig, Matthias},\nmonth = {7},\ntitle = {gt_structure_text},\nurl = {https://github.com/OCR-D/gt_structure_text},\nyear = {2024}\n}\n", "_apa": "Boenig M. (2024). gt_structure_text (version 68_v1.5.0). URL: https://github.com/OCR-D/gt_structure_text Thibault Cl\u00e9rice), of the first semester - Master Humanit\u00e9s Num\u00e9riques ENC-PSL 2021-2022. At the same time it and constitutes part of the biannual project \"Per un\u2019edizione digitale della Genealogia deorum gentilium\" di Boccaccio\" (dir. F. Duval, M. Maulu). Financed in 2021, this project foresees to put on line in XML format the unpublished translation in Middle French entitled \"De la genealogie des dieux\".\n", "language": [ "frm", "lat" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1472", "notAfter": "1498" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 109409 }, { "metric": "files", "count": 47 }, { "metric": "lines", "count": 3656 }, { "metric": "pages", "count": 52 }, { "metric": "regions", "count": 292 } ], "sources": [ { "reference": "Laurent Premierfait, Boccace (1498), \"De la genealogie des dieux\", Paris, A. V\u00e9rard.", "link": "https://gallica.bnf.fr/ark:/12148/bpt6k105063r?rk=21459;2 " } ], "citation-file-link": "https://raw.githubusercontent.com/PSL-Chartes-HTR-Students/HN2021-Boccace/main/CITATION.cff", "transcription-guidelines": "No development of abbreviations. Special characters are used for the graphemic transcription, compatible with the Unicode mufi qnd the special character table of cremma-medieval. No correction of orthography errors, BUT proper transcription of inversed letters (for Inc59) such as character \"n\" printed as \"u\" in several cases. Spaces were added freely for word separation according to dictionaries of middle French and Latin (latin forms verified on Collatinus). For more documentation regarding the transcription norms and guidelines head to the repository and the report file.''\n", "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Vlachou Efstathiou, Malamatenia and Leroy, No\u00e9 and Maulu, Marco},\ndoi = {10.5281/zenodo.6126613},\ntitle = {git-project-Boccace}\n}\n", "_apa": "Vlachou Efstathiou M., Leroy N., Maulu M. git-project-Boccace (version 1.0). DOI: 10.5281/zenodo.6126613 Avant 1568, en pleine guerre de religion, Fran\u00e7ois Leroy, du parti du roi et des catholiques, participe \u00e0 la capture et la ran\u00e7on du prince de Cond\u00e9, du parti protestant. En 1568, Fran\u00e7ois Leroy, en tant que capitaine de 50 lances au service du roi, part en campagne avec lui. L'objectif est de transcrire cinq feuillets d'un manuscrit \u00e0 l'aide d'eScriptorium. Le but \u00e9tant d'apprendre \u00e0 utiliser git et github pour mener \u00e0 bien notre premier projet collaboratif.\n", "language": [ "frm" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1568", "notAfter": "1599" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://raw.githubusercontent.com/PSL-Chartes-HTR-Students/HN-2021-ChateauChavigny/main/CITATION.cff", "transcription-guidelines": "- Gestion des abbr\u00e9viations: \n - Si d\u00e9veloppement (pas toujours), les d\u00e9velopper entre crochets.\n - L'orthographe originale et les abr\u00e9viations doivent \u00eatre conserv\u00e9es.\n- Gestion des \u00e9checs de transcription de caract\u00e8re : lorsqu'un qu'un caract\u00e8re nous para\u00eet non sur, nous pr\u00e9f\u00e9rons mettre un [?] pour indiquer qu'il y a un caract\u00e8re non transcrit dans un mot. Pour plusieurs caract\u00e8res, faire autant de ? que de caract\u00e8re non reconnu : tel [???] pour 3 caract\u00e8res.\n", "volume": [ { "metric": "characters", "count": 9126 }, { "metric": "files", "count": 6 }, { "metric": "lines", "count": 253 }, { "metric": "regions", "count": 22 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Pascual, Margot and Franchet d'Esp\u00e8rey, Louis-Fiacre and Gabay, Simon},\ndoi = {10.5281/zenodo.6126655},\nmonth = {2},\ntitle = {Ch\u00e2teau de Chavigny},\nurl = {https://github.com/PSL-Chartes-HTR-Students/HN2021-ChateauChavigny},\nyear = {2022}\n}\n", "_apa": "Pascual M., Franchet d'Esp\u00e8rey L., Gabay S. (2022). Ch\u00e2teau de Chavigny (version 1.0). DOI: 10.5281/zenodo.6126655 URL: https://github.com/PSL-Chartes-HTR-Students/HN2021-ChateauChavigny Larose, 1893. \n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1893", "notAfter": "1893" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://github.com/PSL-Chartes-HTR-Students/HN2021-Kovalewsky-1893/main/CITATION.CFF", "volume": [ { "metric": "characters", "count": 45626 }, { "metric": "files", "count": 28 }, { "metric": "lines", "count": 983 }, { "metric": "regions", "count": 72 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {L\u2019Eveque, Zo\u00e9 and Ekaterina, Kate and Kasparian, Anahide},\ndoi = {10.5281/zenodo.6126633},\nmonth = {2},\ntitle = {Projet Kovaleswky - 1893},\nurl = {https://github.com/PSL-Chartes-HTR-Students/HN2021-Kovalewsky-1893},\nyear = {2022}\n}\n", "_apa": "L\u2019Eveque Z., Ekaterina K., Kasparian A. (2022). Projet Kovaleswky - 1893 (version 1.0). DOI: 10.5281/zenodo.6126633 URL: https://github.com/PSL-Chartes-HTR-Students/HN2021-Kovalewsky-1893 Elles ont ensuite \u00e9t\u00e9 retranscrites par des b\u00e9n\u00e9voles anonymes ; c'est leur travail nous a servi de base pour corriger nos propres retranscriptions. Les documents sources choisies sont des lettres de diff\u00e9 rents auteurs portant sur les obs\u00e8ques de Jane Lathrop Stanford. Les lettres s\u00e9lectionn\u00e9es \u00e9taient les lettres : 42, 43, 46, 49, 50, 54, 57 \u00e0 60, 69, 75, 76 [section 1, retranscrites par Perrine MAUREL] ; 80 \u00e0 93 [section 2, retranscrites par Ingrid GUIMAR\u00c3ES] ; 241 \u00e0 242 [section 3, retranscrites par Yagmur OZTURK].", "format": "Alto-XML", "hands": { "count": "1-per-file", "precision": "estimated" }, "institutions": [], "language": [ "eng" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-name": "ENC - Bonnes pratiques du developpement collaboratif", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "evenly-mixed", "time": { "notAfter": "1905", "notBefore": "1905" }, "title": "Memorials for Jane Lathrop Stanford", "transcription-guidelines": "Notre retranscription en elle-m\u00eame a cherch\u00e9 \u00e0 retranscrire le texte ipsis litteris, sans le corriger, en conservant donc les erreurs \u00e9ventuelles intrins\u00e8ques au document. Il convient toutefois de noter que dans certains cas, les documents pr\u00e9sentaient des mentions impr\u00e9cises qui n'avaient pas \u00e9t\u00e9 prises en compte par les retranscriptions originelles, ou alors qui avaient \u00e9t\u00e9 soulign\u00e9es comme \u00e9tant une retranscription incertaine. Nous avons alors fait le choix d'\u00eatre plus exhaustif que la retranscription originelle si possible, et nous avons parfois fait des choix de retranscription diff\u00e9rents sur la base de notre ressenti visuel lors du travail. En raison de ces choix, la taille d'une page s'est donc parfois av\u00e9r\u00e9e rallong\u00e9e par rapport \u00e0 l'estimation premi\u00e8re.\n\nAddition: les r\u00e8gles de transcriptions ont \u00e9t\u00e9 adapt\u00e9es pour \u00eatre compatibles avec les pr\u00e9conisations CREMMA/CATMuS, \u00e0 savoir : les portions de texte suscrites sont pr\u00e9c\u00e9d\u00e9es d'un \"^\", les mots barr\u00e9s ou illisible sont encadr\u00e9s des signes \"\u27e6\" et \"\u27e7\". Les zones ne sont pas trac\u00e9es dans le document, mais l'ontologie segmOnto a \u00e9t\u00e9 appliqu\u00e9e pour le typage des lignes, en suivant 5 types possibles: DefaultLine:Handwritten, DefaultLine:Print, DefaultLine:Typewritten, DefaultLine:Signature et InterlinearLine:Handwritten. Cela permet de distinguer ais\u00e9ment les lignes manuscrites ou tapuscrites des en-t\u00eates pr\u00e9imprim\u00e9es des papiers \u00e0 lettre.", "url": "https://github.com/PSL-Chartes-HTR-Students/HN2021-Memorials_Jane_Lathrop_Stanford", "volume": [ { "count": 18323, "metric": "characters" }, { "count": 41, "metric": "files" }, { "count": 774, "metric": "lines" }, { "count": 50, "metric": "regions" } ], "_bibtex": "@misc{YourReferenceHere,\nauthor = {Guimar\u00e3es, Ingrid and Maurel, Perrine and Ozturk, Yagmur and Chagu\u00e9, Alix},\ndoi = {10.5281/zenodo.6126625},\nmonth = {2},\ntitle = {Memorials for Jane Lathrop Stanford},\nyear = {2022}\n}\n", "_apa": "Guimar\u00e3es I., Maurel P., Ozturk Y., Chagu\u00e9 A. (2022). Memorials for Jane Lathrop Stanford (version 1.0). DOI: 10.5281/zenodo.6126625 Il s'agit d'un recueil de po\u00e8mes en corse et en fran\u00e7ais dont les th\u00e8mes varient. *A Muvra* est un journal autonomiste corse d'influence maurassienne qui a exist\u00e9 pendant toute la p\u00e9riode de l'entre-deux-guerres. Se revendiquant comme \u00e9tant une revue culturelle, la dimension politique de la revue (incarn\u00e9e par le PCA, ou Partitu corsu d'azione), en a fait un mouvement controvers\u00e9. C'est dans ce contexte de lutte politique et d'\u00e9veil culturel corse que s'inscrit ce recueil.\nLe second ouvrage s'intitule *A nostra Santa Fede - Catechismu Corsu*, \u00e9crit par Ageniu Grimaldi en 1926 sous le pseudonyme de Saveriu Malaspina. Proche de Petru Rocca, ce-dernier est l'un des th\u00e9oriciens de l'autonomisme corse de l'entre-deux-guerres et fid\u00e8le muvriste. Dans l'ouvrage, il est fait mention notamment de la fa\u00e7on dont un vrai corse doit se comproter vis-\u00e0-vis de sa foi envers Dieu et son \u00eele. Bien qu'il ne s'agisse pas r\u00e9ellement d'un recueil de po\u00e8mes, le style d'\u00e9criture de cet ouvrage est particuli\u00e8rement int\u00e9ressant. Il reprend un style qui se rapproche des \u00e9crits bibliques.\n", "format": "Alto-XML", "hands": { "count": "1-per-folder", "precision": "exact" }, "language": [ "cos", "fra" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "project-name": "ENC - Bonnes pratiques du developpement collaboratif", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1927", "notBefore": "1926" }, "title": "OCR Corse", "transcription-guidelines": "SegmOnto", "url": "https://github.com/PSL-Chartes-HTR-Students/HN2021-OCR-Poesie-Corse", "volume": [ { "count": 41205, "metric": "characters" }, { "count": 47, "metric": "files" }, { "count": 1681, "metric": "lines" }, { "count": 126, "metric": "regions" } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Sarbach-Pulicani, Vincent and Miaille, Th\u00e9ophile and Escoda, Adrien and Sa\u00efag, Violette and Gabay, Simon},\ndoi = {10.5281/zenodo.6126641},\nmonth = {2},\ntitle = {OCR d'une po\u00e9sie corse},\nurl = {https://github.com/PSL-Chartes-HTR-Students/HN2021-OCR-Poesie-Corse},\nyear = {2022}\n}\n", "_apa": "Sarbach-Pulicani V., Miaille T., Escoda A., Sa\u00efag V., Gabay S. (2022). OCR d'une poésie corse (version 1.0). DOI: 10.5281/zenodo.6126641 URL: https://github.com/PSL-Chartes-HTR-Students/HN2021-OCR-Poesie-Corse Cette \u00e9num\u00e9ration et pr\u00e9sentation succincte des brevets est r\u00e9partie en deux colonnes et pr\u00e9sente des abr\u00e9viations normalis\u00e9es. D\u00e8s lors, ce pr\u00e9sent guide de contribution au projet entend pr\u00e9senter l\u2019ensemble des normes de transcriptions adopt\u00e9es au cours de ce projet de transcription, r\u00e9alis\u00e9 sur la plateforme E-scriptorium, dans le cadre du cours Git du master TNAH \u00e0 l\u2019ENC.\n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1910", "notAfter": "1910" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://raw.githubusercontent.com/PSL-Chartes-HTR-Students/TNAH-2021-ArgusDesBrevets/main/CITATION.cff", "transcription-guidelines": "En premier lieu, nous avons d\u00e9cid\u00e9 de fonder notre transcription sur les recommandations publi\u00e9es dans l\u2019ouvrage *L\u2019\u00e9dition critique des textes contemporains, XIXe-XXIe si\u00e8cle*, par Christine Nougaret, Elisabeth Parinet et Florence Clavaud. N\u00e9anmoins, certaines adaptations ont \u00e9t\u00e9 n\u00e9cessaires afin de fournir un jeu de donn\u00e9es issue de la transcription, qui soit \u00e0 la fois proche du document source et exploitable par la suite. Ainsi, concernant les abr\u00e9viations, nous avons d\u00e9cid\u00e9 de conserver la graphie originale au sein de la transcription. Ce choix fut guid\u00e9 par deux \u00e9l\u00e9ments : d\u2019une part, la volont\u00e9 de conserver une graphie int\u00e8gre, afin de fournir aux chercheurs s\u2019int\u00e9ressant \u00e0 ce sujet un texte facilement exploitable de mani\u00e8re automatique, comme par exemple une analyse quantitative des types de soci\u00e9t\u00e9s (anonymes, familiales,\u2026) d\u00e9posant des brevets. Cette d\u00e9cision fut motiv\u00e9e par la facilit\u00e9 de r\u00e9solution et compr\u00e9hension des abr\u00e9viations par le lecteur. D'autre part, il nous semble que cette approche permettrait une r\u00e9utilisation g\u00e9n\u00e9rales des donn\u00e9es, telle qu'un processus d'apprentissage machine.\nNous avons \u00e9t\u00e9 amen\u00e9 \u00e0 r\u00e9aliser certains choix relevant de la transcription et de l\u2019\u00e9dition du document. Pour ce faire, nous nous sommes r\u00e9f\u00e9r\u00e9 au *Lexique typographique en usage \u00e0 l\u2019Imprimerie nationale* : - les tirets en fin de ligne faisant la c\u00e9sure au sein des mots ont \u00e9t\u00e9 r\u00e9tablis (ex : direc-tion). - les num\u00e9ros de page en haut de page ont \u00e9t\u00e9 transcris ainsi : \u00ab _ N _ \u00bb o\u00f9 N correspond au num\u00e9ro de page. - en cas de caract\u00e8res mal imprim\u00e9s ou us\u00e9s, ceux-ci ont \u00e9t\u00e9 r\u00e9tablis dans la mesure o\u00f9 ils sont facilement interpr\u00e9tables (mais non devinables) par le lecteur. \n", "volume": [ { "metric": "characters", "count": 55156 }, { "metric": "files", "count": 17 }, { "metric": "lines", "count": 1962 }, { "metric": "regions", "count": 86 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {De Craene, Valentin and Humeau, Maxime and Reignier, Virgile},\ndoi = {10.5281/zenodo.6126366},\nmonth = {1},\ntitle = {Projet Argus des Brevets},\nurl = {https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-ArgusDesBrevets},\nyear = {2022}\n}\n", "_apa": "De Craene V., Humeau M., Reignier V. (2022). Projet Argus des Brevets (version 1.0). DOI: 10.5281/zenodo.6126366 URL: https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-ArgusDesBrevets Ce manuscrit contient la traduction fran\u00e7aise du Decameron de Boccace par Laurent de Premierfait. Nos v\u00e9rit\u00e9s de terrain recouvrent la description de la peste \u00e0 Florence situ\u00e9e dans le prologue de l'ouvrage.\n", "format": "Alto-XML", "hands": { "count": "1", "precision": "exact" }, "language": [ "frm" ], "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "project-name": "ENC - Bonnes pratiques du developpement collaboratif\n", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1455", "notBefore": "1430" }, "title": "DecameronFR", "transcription-guidelines": "Cf. https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-DecameronFR/blob/main/normesTranscription.md\n", "url": "https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-DecameronFR", "volume": [ { "count": 19821, "metric": "characters" }, { "count": 9, "metric": "files" }, { "count": 751, "metric": "lines" }, { "count": 41, "metric": "regions" } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Biay, S\u00e9bastien and Boby, Victor and Konstantinova, Kristina and Cappe, Zo\u00e9},\ndoi = {10.5281/zenodo.6126376},\ntitle = {TNAH-2021-DecameronFR},\nurl = {https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-DecameronFR}\n}\n", "_apa": "Biay S., Boby V., Konstantinova K., Cappe Z. TNAH-2021-DecameronFR (version 1.0). DOI: 10.5281/zenodo.6126376 URL: https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-DecameronFR (2022). Projet Exposition Universelle de 1878 (version 1.0). DOI: 10.5281/zenodo.6126447 URL: https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-Expositions_Universelles L\u2019ensemble des lettres adress\u00e9es \u00e0 Nanci Berlioz repr\u00e9sentait un volume trop important pour notre projet, aussi nous les avons s\u00e9lectionn\u00e9es, par souci de coh\u00e9rence, selon un ordre chronologique (voir le tableau de gestion) pour la liste exacte des lettres transcrites).\n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1823", "notAfter": "1844" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://raw.githubusercontent.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Correspondance-Berlioz/main/CITATION.cff", "transcription-guidelines": "**Orthographe :** - Aucune modification op\u00e9r\u00e9e sur l'orthographe, m\u00eame en pr\u00e9sence de fautes. - L'orthographe ancienne est laiss\u00e9e telle quelle. - Aucune restitution des accents manquants. Aucune correction des accents fautifs. Restitution de la bonne graphie de l'accent, lorsque nous consid\u00e9rons qu'il y a une variation de la graphie de celui-ci \u00e0 cause de la rapidit\u00e9 d'\u00e9criture. - Aucune restitution des traits d'union manquants. - S\u00e9paration des mots coll\u00e9s d\u00e8s lors que la ligature entre ces mots semble due \u00e0 la rapidit\u00e9 de l'\u00e9criture.\n**Abr\u00e9viations :** - Aucune r\u00e9solution d'abr\u00e9viation. - Utilisation du symbole mon\u00e9taire de la livre tournois \u2192 **\u20b6** (Unicode U+20B6).\n**Mots en exposant :** - Restitution seulement du mot sans le mettre en exposant.\n**Majuscules et minuscules :** - Aucune restitution des majuscules, m\u00eame lorsqu'elles sont absentes en d\u00e9but de phrase ou de nom propre.\n**Ponctuation :** - Aucune restitution de la ponctuation manquante. Aucune correction de la ponctuation fautive. - Emploi du tiret cadratin (\u2014, unicode U+2014) de part et d'autre d'une incise. - Emploi du tiret demi-cadratin (\u2013, unicode U+2013) pour marquer le changement d\u2019interlocuteur dans les dialogues et devant les \u00e9l\u00e9ments des listes/ \u00e9num\u00e9rations.\n", "volume": [ { "metric": "characters", "count": 13474 }, { "metric": "files", "count": 16 }, { "metric": "lines", "count": 367 }, { "metric": "regions", "count": 64 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Ceard, Lien and Lebreton, Fanny and Sajdak, C\u00e9cile},\ndoi = {10.5281/zenodo.6126475},\nmonth = {1},\ntitle = {Projet Correspondance Berlioz},\nurl = {https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Correspondance-Berlioz},\nyear = {2022}\n}\n", "_apa": "Ceard L., Lebreton F., Sajdak C. (2022). Projet Correspondance Berlioz (version 1.0). DOI: 10.5281/zenodo.6126475 URL: https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Correspondance-Berlioz Celle-ci a \u00e9t\u00e9 effectu\u00e9e sur eScriptorium \u00e0 partir de la num\u00e9risation des journaux des travaux (https://mediatheque-patrimoine.culture.gouv.fr/travaux-de-notre-dame-de-paris-1844-1865) r\u00e9alis\u00e9e par la M\u00e9diath\u00e8que de l'architecture et du patrimoine. \n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1860", "notAfter": "1860" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "citation-file-link": "https://raw.githubusercontent.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Notre-Dame/main/CITATION.cff", "transcription-guidelines": "- respect des majuscules et minuscules - respect des ligatures (par exemple, transcrire \"ch\u0153ur\") - mot qui est barr\u00e9 : \u96be (une seule fois par mot) mais seulement s'ils sont totalement/\u00e0 moiti\u00e9 illisibles. Les restranscrire entre accolades {} s'ils sont lisibles. - Pour mettre en exergue les doutes de transcription : \n - mot incertain: [incertain]\n - mot que l'on ne parvient pas \u00e0 transcrire : [??]\n", "volume": [ { "metric": "characters", "count": 29286 }, { "metric": "files", "count": 12 }, { "metric": "lines", "count": 735 }, { "metric": "regions", "count": 86 } ], "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Doat, Soline and Falcoz, Elsa and Faure, Margaux and Mazou\u00e9, Ana\u00efs and Menu, Ariane},\ndoi = {10.5281/zenodo.6126491},\nmonth = {1},\ntitle = {Projet Notre-Dame},\nurl = {https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Notre-Dame},\nyear = {2022}\n}\n", "_apa": "Doat S., Falcoz E., Faure M., Mazou\u00e9 A., Menu A. (2022). Projet Notre-Dame (version 1.0). DOI: 10.5281/zenodo.6126491 URL: https://github.com/PSL-Chartes-HTR-Students/TNAH-2021-Projet-Notre-Dame Images are available on request by writing to: pauline.jacsont [ at ] unige.ch.", "project-name": "FoNDUE", "language": [ "lat" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" }, { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "1561", "notAfter": "1570" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "sources": [ { "reference": "", "link": "http://katalog.burgerbib.ch/detail.aspx?ID=340662" } ], "volume": [ { "metric": "pages", "count": 49 } ], "citation-file-link": "https://github.com/PaulineJac/GasparoSardiToponomasia/blob/main/HTR/CITATION.cff", "transcription-guidelines": " The transcriptions were made following the rules of the github cremma-medieval repository - https://github.com/HTR-United/cremma-medieval. The transcription is strictly diplomatic and graphmatic. No abbreviations are resolved, no standardization of 'i' and 'v' with ramist letters, and accents, punctuation, spaces, and line breaks are strictly adhered to. Following Leiden conventions, crossed out or crossed out elements are transcribed with double brackets \u27e6\u27e7, and elements that are illegible in the picture will not be restored but indicated by this type of bracket \u27e8 \u27e9. Special characters are encoded according to the MUFI fonts.", "automatically-aligned": false, "_pid": "a45f799c4" }, "4522813d9": { "authors": [ { "name": "Dubois", "roles": [ "project-manager" ], "surname": "Alain" }, { "name": "Cl\u00e9rice", "roles": [ "project-manager", "quality-control" ], "surname": "Thibault" }, { "name": "Rudaz", "roles": [ "transcriber" ], "surname": "Clemence" }, { "name": "Schlaeppi", "roles": [ "transcriber" ], "surname": "Darius" }, { "name": "Mamie", "roles": [ "transcriber" ], "surname": "Delphine" }, { "name": "Schmied", "roles": [ "support" ], "surname": "Marie-Caroline" } ], "characters": { "members": [ "e", "1", "a", "i", "r", "l", "n", "s", "t", "o", "u", "8", "c", "/", "h", "\"", "d", "2", "m", "M", "b", "f", "g", "V", "3", "6", "4", "5", "F", "J", "p", "7", "v", "A", "S", "0", "\u0327", "\u0300", "\u0301", "z", "y", "C", "B", "9", "D", "L", ".", "W", "P", "G", "E", "T", "\u0336", "R", "H", "N", "O", "\u0308", "x", "I", "K", "k", "w", "\u00b0", "q", "-", "j", "\u0302", "?", "Z", "'", "_", "^", "\u0335", "X", "U", "(", ")", "=", ",", "Q", ":", "<", ">", "\u0153", "!", "&", "[", "]", "\u15c5", "\u00a8", "*", "\u00a7", "}", "\\", "+", "#" ], "mode": "NFD" }, "citation-file-link": "https://raw.githubusercontent.com/PonteIneptique/valais-recensement/main/CITATION.CFF", "description": "Ensemble de formulaire de recensement", "format": "Alto-XML", "hands": { "count": "1-per-file", "precision": "exact" }, "institutions": [ { "name": "Archives du Valais", "roles": [ "digitization" ] } ], "language": [ "fra", "deu" ], "license": [ { "name": "CC-BY-BC 4.0", "url": "https://creativecommons.org/licenses/by-nc/4.0/" } ], "production-software": "eScriptorium + Kraken", "project-name": "Valais Time Machine", "project-website": "https://www.timemachinevs.ch/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notAfter": "1890", "notBefore": "1870" }, "title": "Recensement Valaisan (Valais Time Machine)", "transcription-guidelines": "- Superscript are transcribed with a ^ before the string.\n- Transcription is faithful: nothing is corrected.\n- Checkmarks in table are transcribed as `/`. Some checkmarks looking character can be transcribed as `1` if the 1 in the dates looks the same\n- Printed part of the form is not transcribed.\n- Only `Col` and `Header` regions are used for table segmentation. If a Signature is at the bottom, we also use `Signature`", "url": "https://github.com/PonteIneptique/valais-recensement", "volume": [ { "count": 282260, "metric": "characters" }, { "count": 915, "metric": "files" }, { "count": 59368, "metric": "lines" }, { "count": 34083, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Alain, Dubois and Cl\u00e9rice, Thibault and Mamie, Delphine and Darius, Schlaeppi and Rudaz, Cl\u00e9mence and Schmied, Marie-Caroline},\ntitle = {Tables du recensement du Valais},\nurl = {https://github.com/PonteIneptique/valais-recensement}\n}\n", "_apa": "Alain D., Cl\u00e9rice T., Mamie D., Darius S., Rudaz C., Schmied M. Tables du recensement du Valais URL: https://github.com/PonteIneptique/valais-recensement They are archived in the \u2019Colecci\u00f3n manuscritos' of the Archivo Central Andres Bello - Universidad de Chile.", "language": [ "spa" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1859", "notAfter": "1877" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 117155 }, { "metric": "files", "count": 180 }, { "metric": "lines", "count": 3932 }, { "metric": "regions", "count": 981 } ], "transcription-guidelines": "- xxx for erased or unreadable characters\n- ^+letters for superscript letters\n- \u204b for new paragraph\n", "characters": { "mode": "NFD", "members": [ "e", "a", "o", "n", "s", "r", "i", "d", "l", "u", "t", "c", "m", "p", "q", "b", "\u0301", "g", ".", "h", ",", "\u204b", "v", "-", "f", "y", "S", "C", "0", "^", "A", "j", "U", "1", "z", "x", "D", "M", "\u0303", "E", "2", "L", "P", "N", "8", "V", "J", "B", "T", "G", "6", "I", "5", "3", ":", "9", "4", "H", "R", "7", ";", "O", "\u201c", "\u00ba", "\u201d", "F", "Q", "Y", "\u0304", "*", "_", "=", "$", "(", "\"", ")", "\u00bf", "/", "\u0300", "?", "\u0308", "\u00a1", "!", "{", "~", "}", "&", "W", "Z", "\u2018", "\u2019", "K", "[", "]" ] }, "automatically-aligned": false, "_pid": "3a60c6704" }, "96463e268": { "authors": [ { "name": "Sonia", "orcid": "0009-0009-7367-048X", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Solfrini" }, { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "support" ], "surname": "Gabay" }, { "name": "Genevi\u00e8ve", "orcid": "0009-0006-5367-4262", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Gross" }, { "name": "Pierre-Olivier", "orcid": "0009-0009-2475-6017", "roles": [ "transcriber", "quality-control" ], "surname": "Beaulnes" }, { "name": "Aur\u00e9lia", "orcid": "0009-0009-9678-9811", "roles": [ "transcriber", "quality-control" ], "surname": "Marques Oliveira" }, { "name": "Daniela", "orcid": "0000-0002-2601-668X", "roles": [ "project-manager" ], "surname": "Solfaroli Camillocci" } ], "characters": { "members": [ "e", "s", "u", "a", "i", "n", "t", "r", "o", "l", "c", "d", "p", "m", ".", ",", "f", "q", "g", "\u0303", "y", "b", "h", "/", "z", "\u204a", "\u00ac", ":", "C", "D", "x", "E", "I", "P", "L", "S", "1", "A", "M", "Q", "2", "U", "?", "3", "N", "T", "4", "O", "\u0365", "B", "R", "\ua770", "H", "6", "5", "\u036c", "G", "8", "F", "(", ")", "0", "9", "\u00b6", "7", "\u25ca", "\ua753", "\u00a0", "\ua751", "\u1455", "V", "-", "Y", ";", "\u15de", "J", "k", "\u0300", "\ua76f", "Z", "v" ], "mode": "NFD" }, "citation-file-link": "https://github.com/SETAFDH/HTR-SETAF-Jean-Michel/blob/main/CITATION.cff", "description": "OCR data for the SETAF project, 16th-century French prints in Gothic characters.", "format": "Alto-XML", "hands": { "count": "1", "precision": "exact" }, "language": [ "fra" ], "license": { "type": "CC-BY", "version": 4.0 }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://www.unige.ch/lettres/humanites-numeriques/recherche/projets-de-la-chaire/fondue", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1600", "notBefore": "1500" }, "title": "HTR-SETAF-Jean-Michel", "transcription-guidelines": "Our data follow SegmOnto segmentation standards (https://segmonto.github.io). Our transcription guidelines follow a graphematic approach, without regularisation. We keep the original punctuation and abbreviations. A detailed presentation of our rules is available on HAL (https://hal.science/hal-04281804).", "url": "https://github.com/SETAFDH/HTR-SETAF-Jean-Michel", "volume": [ { "count": 286256, "metric": "characters" }, { "count": 404, "metric": "files" }, { "count": 11778, "metric": "lines" }, { "count": 1365, "metric": "regions" } ], "_pid": "96463e268" }, "9d5c1595c": { "authors": [ { "name": "Sonia", "orcid": "0009-0009-7367-048X", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Solfrini" }, { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "support" ], "surname": "Gabay" }, { "name": "Genevi\u00e8ve", "orcid": "0009-0006-5367-4262", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Gross" }, { "name": "Pierre-Olivier", "orcid": "0009-0009-2475-6017", "roles": [ "transcriber", "quality-control" ], "surname": "Beaulnes" }, { "name": "Aur\u00e9lia", "orcid": "0009-0009-9678-9811", "roles": [ "transcriber", "quality-control" ], "surname": "Marques Oliveira" }, { "name": "Daniela", "orcid": "0000-0002-2601-668X", "roles": [ "project-manager" ], "surname": "Solfaroli Camillocci" } ], "characters": { "members": [ "e", "s", "u", "i", "a", "t", "n", "r", "o", "l", "c", "d", "p", "m", "/", ".", "q", "f", "\u204a", "\u0303", "y", "g", "h", "z", "b", "\u00ac", "x", "I", ":", "C", "E", "P", "1", "\u00b6", "L", "D", "S", "2", "A", "\ua770", "M", "R", "\u0365", "3", "N", "?", "4", "Q", "T", "6", "\u036c", "7", "H", "8", "5", "0", "9", "U", "B", "G", "O", "F", ")", "(", "\ua751", "\ua753", "v", "J", "-", "\ua76b", "\u0142", "\ua76f", "Z", "k", "K", "\u15c5", "\u00f0", "\ua757", "\u0308", "\u25ca", ",", "V", "\u1455", "j" ], "mode": "NFD" }, "citation-file-link": "https://github.com/SETAFDH/HTR-SETAF-LesFaictzJCH/blob/main/CITATION.cff", "description": "OCR data for the SETAF project, 16th-century French prints in Gothic characters.", "format": "Alto-XML", "hands": { "count": "1", "precision": "exact" }, "language": [ "fra" ], "license": { "type": "CC-BY", "version": 4.0 }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://www.unige.ch/lettres/humanites-numeriques/recherche/projets-de-la-chaire/fondue", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1600", "notBefore": "1500" }, "title": "HTR-SETAF-LesFaictzJCH", "transcription-guidelines": "Our data follow SegmOnto segmentation standards (https://segmonto.github.io). Our transcription guidelines follow a graphematic approach, without regularisation. We keep the original punctuation and abbreviations. A detailed presentation of our rules is available on HAL (https://hal.science/hal-04281804).", "url": "https://github.com/SETAFDH/HTR-SETAF-LesFaictzJCH", "volume": [ { "count": 205523, "metric": "characters" }, { "count": 144, "metric": "files" }, { "count": 4765, "metric": "lines" }, { "count": 485, "metric": "regions" } ], "_pid": "9d5c1595c" }, "1f8e04f53": { "authors": [ { "name": "Sonia", "orcid": "0009-0009-7367-048X", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Solfrini" }, { "name": "Simon", "orcid": "0000-0001-9094-4475", "roles": [ "support" ], "surname": "Gabay" }, { "name": "Genevi\u00e8ve", "orcid": "0009-0006-5367-4262", "roles": [ "transcriber", "project-manager", "quality-control" ], "surname": "Gross" }, { "name": "Pierre-Olivier", "orcid": "0009-0009-2475-6017", "roles": [ "transcriber", "quality-control" ], "surname": "Beaulnes" }, { "name": "Aur\u00e9lia", "orcid": "0009-0009-9678-9811", "roles": [ "transcriber", "quality-control" ], "surname": "Marques Oliveira" }, { "name": "Daniela", "orcid": "0000-0002-2601-668X", "roles": [ "project-manager" ], "surname": "Solfaroli Camillocci" } ], "characters": { "members": [ "e", "s", "u", "i", "a", "t", "r", "o", "n", "l", "c", "d", "p", "m", "\u0303", ".", "/", "q", "f", "y", "g", "h", "b", "\u204a", "z", "\u00ac", ":", "x", "C", "I", "1", "E", "L", "D", "P", "A", "\u00b6", "\u0365", "M", "2", "S", "3", "?", "\u036c", "Q", "4", "N", "T", "\ua770", "5", "*", "U", "R", "6", "0", ",", "7", "H", "8", "O", "9", "(", ")", "\ua753", "G", "B", "\ua751", "F", "\u0308", "\ua76f", "-", "\u0142", "\u25ca", "\u00f0", "\u00a0", "\ua75d", "Z", "v", "Y", "k", "'", "K", "\u0300", "X", "\u0301", "\ua76b", "V", "J", "\ua759", "w", ";", "\ua757", "\u0307", "\u1d49", "\u030c" ], "mode": "NFD" }, "citation-file-link": "https://github.com/SETAFDH/HTR-SETAF-Pierre-de-Vingle/blob/main/CITATION.cff", "description": "OCR data for the SETAF project, 16th-century French prints in Gothic characters.", "format": "Alto-XML", "hands": { "count": "1", "precision": "exact" }, "language": [ "fra" ], "license": { "type": "CC-BY", "version": 4.0 }, "production-software": "eScriptorium + Kraken", "project-name": "FoNDUE", "project-website": "https://www.unige.ch/lettres/humanites-numeriques/recherche/projets-de-la-chaire/fondue", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notAfter": "1600", "notBefore": "1500" }, "title": "HTR-SETAF-Pierre-de-Vingle", "transcription-guidelines": "Our data follow SegmOnto segmentation standards (https://segmonto.github.io). Our transcription guidelines follow a graphematic approach, without regularisation. We keep the original punctuation and abbreviations. A detailed presentation of our rules is available on HAL (https://hal.science/hal-04281804).", "url": "https://github.com/SETAFDH/HTR-SETAF-Pierre-de-Vingle", "volume": [ { "count": 1708929, "metric": "characters" }, { "count": 1833, "metric": "files" }, { "count": 64295, "metric": "lines" }, { "count": 8537, "metric": "regions" } ], "_pid": "1f8e04f53" }, "d26cd8486": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Moonshines", "url": "https://github.com/alix-tz/moonshines", "authors": [ { "name": "Alix", "surname": "Chagu\u00e9", "orcid": "0000-0002-0136-4434", "roles": [ "transcriber", "aligner", "project-manager", "digitization" ] } ], "institutions": [], "description": "This dataset is composed of pages of text written in 2023 by a single person, copying texts taken from Guillaume Apollinaire's poems published in Alcools, and taken from Guillaume Apollinaire's Wikipedia page.", "language": [ "fra" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "2023", "notAfter": "2023" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 27734 }, { "metric": "files", "count": 45 }, { "metric": "lines", "count": 1016 }, { "metric": "regions", "count": 45 } ], "citation-file-link": "https://github.com/alix-tz/moonshines/blob/master/CITATION.cff", "transcription-guidelines": "The transcription strictly follows what is written on the images, including accentuation or capitalization errors. The segmentation follows the SegmOnto ontology and mostly relies on MainZone and DefaultLine. Beware that this dataset barely contains any ponctuation and that most lines begin with a capital letter.", "characters": { "mode": "NFD", "members": [ "e", "s", "a", "n", "r", "i", "t", "u", "o", "l", "d", "m", "c", "p", "\u0301", "'", "v", "g", "b", "h", "\u0300", "f", "L", "q", "E", "1", "A", "C", "x", "y", "\u0302", "S", "9", "P", "M", "j", "T", "D", "-", "N", "J", "R", "0", "z", "O", "I", "2", "8", "V", "F", "G", "U", "5", "B", "Q", ")", "H", "3", "(", "7", "6", "w", "k", "4", "\u0327", "K", "Z", "\u0308", "Y", "{", "}", "W", ".", "X", "," ] }, "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chagu\u00e9, Alix},\ndoi = {0.5281/zenodo.607720783},\nmonth = {2},\ntitle = {moonshines},\nurl = {https://github.com/alix-tz/moonshines},\nyear = {2023}\n}\n", "_apa": "Chagu\u00e9 A. (2023). moonshines (version 2.0.0). DOI: 0.5281/zenodo.607720783 URL: https://github.com/alix-tz/moonshines Les textes suscrits ne sont pas signal\u00e9s. Ce qui est \u00e9crit est transcrits. S'il y a des incertitutes, la ligne est laiss\u00e9e vide. La segmentation de certains documents ne convient pas pour l'entra\u00eenement d'un mod\u00e8le de segmentation. L'ontologie SegmOnto a \u00e9t\u00e9 utilis\u00e9e. Quand les mots ajout\u00e9s sont ins\u00e9r\u00e9s par un '\u22ce', ce graph\u00e8me est transcrit par un \u22ce.", "url": "https://github.com/alix-tz/peraire-ground-truth", "volume": [ { "count": 97505, "metric": "characters" }, { "count": 67, "metric": "files" }, { "count": 2307, "metric": "lines" }, { "count": 151, "metric": "regions" } ], "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Chagu\u00e9, Alix and P\u00e9rez, Gilles},\ndoi = {10.5281/zenodo.7185907},\nmonth = {6},\ntitle = {Peraire Ground Truth},\nurl = {https://github.com/alix-tz/peraire-ground-truth},\nyear = {2023}\n}\n", "_apa": "Chagu\u00e9 A., P\u00e9rez G. (2023). Peraire Ground Truth (version 2.0.0). DOI: 10.5281/zenodo.7185907 URL: https://github.com/alix-tz/peraire-ground-truth Images come from three manuscripts selected among the collections of the BULAC Library (Paris). It covers a representative part of the handwritten production in Arabic Maghrebi scripts and includes an annotation of the layout (TextRegions, baselines and polygons) and the transcription of the main text. This dataset is the result of a collaborative transcription. All the participants are credited on the official deposit. With the support of the French Ministry of Higher Education, Research and Innovation, the Research Consortium Middle-East and Muslim Worlds (GIS MOMM), Calfa and the BULAC library.\n", "language": [ "ara" ], "script": [ { "iso": "Arab" } ], "script-type": "only-manuscript", "time": { "notBefore": "1700", "notAfter": "1899" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": [ { "name": "Apache-2.0 License", "url": "https://www.apache.org/licenses/LICENSE-2.0" } ], "format": "Page-XML", "volume": [ { "metric": "pages", "count": 300 }, { "count": 7540, "metric": "lines" }, { "count": 300, "metric": "files" }, { "count": 676, "metric": "regions" }, { "count": 403034, "metric": "characters" } ], "sources": [ { "reference": "Vidal-Gor\u00e8ne, C., Lucas, N., Salah, C., Decours-Perez, A., & Dupin, B. (2021, September). RASAM\u2013A Dataset for the Recognition and Analysis of Scripts in Arabic Maghrebi. In International Conference on Document Analysis and Recognition (pp. 265-281). Springer, Cham", "link": "https://link.springer.com/chapter/10.1007/978-3-030-86198-8_19" } ], "transcription-guidelines": "Full description of specifications for transcription available on Github and in the paper.'\n", "production-software": "Calfa Vision", "automatically-aligned": false, "_pid": "983e367d9" }, "8df2bf945": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "TariMa", "url": "https://github.com/calfa-co/tarima", "authors": [ { "name": "Antoine", "surname": "Perrier", "orcid": "0000-0002-5035-4283", "roles": [ "project-manager" ] } ], "institutions": [ { "name": "BULAC", "roles": [ "project-manager" ] } ], "description": "The dataset has been collated within the frame of the TariMa project (Tarih al-Maghrib. Writing History in the Maghreb in the modern and contemporary era), sponsored by the French agency Collex-Persee and supervised by Antoine Perrier (CNRS). It comprises different image resolution and size (width from 982px to 8049px), different layouts (double page, multiple columns), and state of conservation. It also mixes microfilms, scans and lithographies. It presents a very wide variety representative of the Maghrebi Arabic production.", "project-website": "https://www.collexpersee.eu/projet/tarima/", "language": [ "ara" ], "production-software": "Calfa Vision", "script": [ { "iso": "Arab", "qualify": "Maghrebi" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1500", "notAfter": "1899" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "sources": [ { "reference": "", "link": "https://github.com/calfa-co/tarima" } ], "volume": [ { "metric": "files", "count": 120 }, { "metric": "lines", "count": 2673 }, { "metric": "characters", "count": 146667 } ], "transcription-guidelines": "We follow the RASAM guidelines for the transcription of Arabic Maghrebi manuscripts.", "automatically-aligned": false, "_pid": "8df2bf945" }, "470bdd3df": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "OCR17plus", "url": "https://github.com/e-ditiones/OCR17plus", "project-name": "E-ditiones", "project-website": "https://e-ditiones.huma-num.fr/", "authors": [ { "name": "Gabay", "surname": "Simon", "roles": [ "transcriber", "project-manager", "support" ] }, { "name": "Jahan", "surname": "Claire", "roles": [ "transcriber", "aligner" ] } ], "description": "Imprim\u00e9s classiques", "language": [ "frm" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1600", "notAfter": "1700" }, "hands": { "count": "1-per-folder", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "count": 25628, "metric": "lines" }, { "count": 965, "metric": "files" }, { "count": 3923, "metric": "regions" }, { "count": 686335, "metric": "characters" } ], "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Jahan, Claire and Gabay, Simon},\ndoi = {none},\nmonth = {7},\ntitle = {OCR17+ - Layout analysis and text recognition for 17th c. French prints},\nurl = {https://github.com/e-ditiones/OCR17plus},\nyear = {2021}\n}\n", "_apa": "Jahan C., Gabay S. (2021). OCR17+ - Layout analysis and text recognition for 17th c. French prints (version 1.0). DOI: none URL: https://github.com/e-ditiones/OCR17plus\n", "_pid": "470bdd3df" }, "c06c6e7bb": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "GenAuto TD Corpus", "url": "https://github.com/jpmjpmjpm/genauto-td-htr.git", "project-name": "GenAuto", "project-website": "", "authors": [ { "name": "Boutet", "surname": "Jean-Fran\u00e7ois", "roles": [ "transcriber", "aligner" ] }, { "name": "Merx", "surname": "Jean-Pierre", "roles": [ "transcriber", "aligner", "project-manager" ] } ], "description": "150 transcribed images from \"Tables D\u00e9cennales\" French Civil Registry. Those come from Sermaises and Romilly-sur-Seine municipalities.\n", "language": [ "fra" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1792", "notAfter": "1902" }, "hands": { "count": "less-than-11", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "count": 300, "metric": "pages" }, { "count": 150, "metric": "images" }, { "count": 150, "metric": "files" }, { "count": 186366, "metric": "characters" }, { "count": 21557, "metric": "lines" }, { "count": 608, "metric": "regions" } ], "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Boutet, Jean-Fran\u00e7ois and Merx, Jean-Pierre},\ndoi = {10.5281/zenodo.5507403},\nmonth = {9},\ntitle = {GenAuto TD Corpus},\nurl = {https://github.com/jpmjpmjpm/genauto-td-htr.git},\nyear = {2021}\n}\n", "_apa": "Boutet J., Merx J. (2021). GenAuto TD Corpus (version 1.0.0). DOI: 10.5281/zenodo.5507403 URL: https://github.com/jpmjpmjpm/genauto-td-htr.git Many transcriptions in this dataset were generated by a small team of anonymous volunteers as part of the Joseph Hooker Correspondence Project based at Kew Gardens. All images in this dataset are reproduced with the kind permission of the Board of Trustees of the Royal Botanic Gardens Kew (\u00a9 RBG, Kew). Contact archives@kew.org for more information.\n\nHTR Model: Schaefer, John, & Litvine, Alexis. (2023). Joseph Hooker HTR Model. Zenodo. https://doi.org/10.5281/zenodo.8038689", "project-name": "Joseph Hooker Correspondence Project", "project-website": "https://www.kew.org/science/our-science/projects/joseph-hooker-correspondence-project", "language": [ "eng" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1850", "notAfter": "1911" }, "hands": { "count": "1", "precision": "estimated" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Page-XML", "volume": [ { "metric": "lines", "count": 7100 }, { "metric": "files", "count": 337 }, { "metric": "pages", "count": 337 } ], "transcription-guidelines": "All horizontal lines in Hooker's hand were transcribed as originally written. Most typescript and vertical lines in the margins were not included.", "automatically-aligned": false, "_pid": "cc159d01a" }, "f95039357": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "NuBIS-OCR", "url": "https://github.com/ksefil/NuBIS-OCR", "authors": [ { "name": "Kutay", "surname": "Sefil", "roles": [ "transcriber" ] } ], "institutions": [], "description": "Ground truth dataset for a selection of printed books from NuBIS, the digital library of the Biblioth\u00e8que Interuniversitaire de la Sorbonne.", "language": [ "fra", "lat" ], "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1602", "notAfter": "1989" }, "hands": { "count": "unknown", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Alto-XML", "sources": [ { "reference": "", "link": "https://nubis.bis-sorbonne.fr/" } ], "volume": [ { "metric": "pages", "count": 57 } ], "_pid": "f95039357" }, "a23a0f5a1": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Eutyches", "url": "https://github.com/malamatenia/Eutyches", "authors": [ { "name": "Vlachou Efstathiou", "surname": "Malamatenia", "roles": [ "transcriber", "aligner", "project-manager" ] } ], "institutions": [], "description": "Ground truth for minuscule caroline of the late 9th century from the grammatical work \"de uerbo\" of Eutych\u00e8s. ", "project-name": "Eutyches grammaticus glossed", "language": [ "lat", "grc" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn", "qualify": "Minuscule Caroline" } ], "script-type": "only-manuscript", "time": { "notBefore": "850", "notAfter": "900" }, "hands": { "count": "less-than-11", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "sources": [ { "reference": "Codices Vossiani Latini, Brill , VLO41", "link": "https://primarysources.brillonline.com/browse/vossiani-latini/vlo-041-eutyches-grammaticalia-isidorus-alphabeta" } ], "volume": [ { "metric": "pages", "count": 65 } ], "citation-file-link": "https://github.com/malamatenia/Eutyches/blob/main/CITATION.cff", "transcription-guidelines": "Graphematic transcription, following the guidelines of CREMMA-medieval. Spacing has been reestablished when dealing with semicontinua, s for long s, loyal to the manuscript for capital letters, abbreviations preserved, punctuation reduced to \";\" and \".\". The few greek passages have been also been preserved, and some of the essais de plume as well (when forming full words). Annotation of the layout made with SegmOnto controlled vocabulary.", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Vlachou-Efstathiou, Malamatenia},\ntitle = {Eutyches \"de uerbo\" glossed}\n}\n", "_apa": "Vlachou-Efstathiou M. Eutyches "de uerbo" glossed Compiled from several 11th-century manuscripts of the Decretum Burchardi, it supports the ongoing edition project Burchards Dekret Digital. Annotations are tailored to project-specific needs but can be adapted for other use cases. The data was first prepared using Transkribus and then remasked in eScriptorium for usage in Kraken.", "project-name": "Burchards Dekret Digital", "project-website": "https://www.adwmainz.de/projekte/burchards-dekret-digital/informationen.html", "language": [ "lat" ], "production-software": "eScriptorium + Kraken + Transkribus", "automatically-aligned": false, "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1000", "notAfter": "1199" }, "hands": { "count": "unknown", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "pages", "count": 3000 } ], "_pid": "218580cef" }, "7e15a5255": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Shakespeare-Scott translations", "url": "https://github.com/millawell/ocr-data", "project-name": "Publishing an OCR ground truth data set for reuse in an unclear copyright setting'\n", "project-website": "https://github.com/millawell/ocr-data", "authors": [ { "name": "Lassner", "surname": "David" }, { "name": "Coburger", "surname": "Julius" }, { "name": "Neudecker", "surname": "Clemens" }, { "name": "Baillot", "surname": "Anne" } ], "description": "Ground truth data in German and English of Shakespeare and Scott prints in original and different translations. \n", "language": [ "eng", "deu" ], "script": [ { "iso": "Latn" }, { "iso": "Latf" } ], "script-type": "only-typed", "time": { "notBefore": "1815", "notAfter": "1852" }, "hands": { "count": "unknown", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "lines", "count": 5354 }, { "metric": "files", "count": 131 }, { "metric": "regions", "count": 131 }, { "metric": "characters", "count": 192264 } ], "sources": [ { "reference": "", "link": "https://zfdg.de/sb005_006" } ], "citation-file-link": "https://github.com/millawell/ocr-data/blob/master/citation.cff", "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "_pid": "7e15a5255" }, "da9453ee0": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Paris Bible Project (PBP)", "url": "https://github.com/parisbible/ground_truth", "authors": [ { "name": "Estelle", "surname": "Gu\u00e9ville", "orcid": "0000-0003-2603-1051", "roles": [ "transcriber", "aligner", "project-manager", "quality-control" ] }, { "name": "David", "surname": "Wrisley", "orcid": "0000-0002-0355-1487", "roles": [ "transcriber", "aligner", "project-manager", "quality-control" ] }, { "name": "Niccol\u00f2 Acram", "surname": "Cappelletto", "roles": [ "transcriber", "aligner", "quality-control" ] } ], "institutions": [], "description": "The Paris Bible Project aims to understand the production and diffusion of medieval Latin Bibles in Europe. The dataset includes ground truth from Paris Bibles produced in the 13th and 14th centuries. We also provide the most recent version of our list of Paris Bible manuscripts found in the world along with information about them.", "project-website": "https://parisbible.github.io/", "language": [ "lat" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1200", "notAfter": "1399" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "lines", "count": 1700 }, { "metric": "files", "count": 19 }, { "metric": "regions", "count": 40 }, { "metric": "characters", "count": 55970 } ], "characters": { "mode": "NFKD", "members": [ "i", "e", "t", "u", "a", "s", "o", "n", "\u0304", "c", "m", "r", "l", "\ua77a", ".", "p", "b", "q", "\u204a", "g", "f", "\u0301", "\ua75b", "h", "-", "d", "\ua76b", ";", "x", "\ua76f", "\u033e", "\ua751", "\u0365", "E", "\u0315", "\ua75d", "\u0303", "\ua753", "y", "\u0308", "N", "\u0307", "Q", "\u00b7", "D", "S", "I", "A", "\u0366", "C", "T", "\u1506", "\ua759", "H", "F", "P", "\u0363", "2", "V", "M", ":", "R", "z", "L", "O", "U", "v", "\u211f", "G", "\u0368", "\u0367", "&", "\u1e9c", "\u1de4", "\u0364", "\u0280", "B", "X", "\ua758", "?", "k", "\u18f3", "j", "\u036c" ] }, "transcription-guidelines": "See: https://parisbible.github.io/guidelines/", "automatically-aligned": false, "_bibtex": "@misc{YourReferenceHere,\nauthor = {Gu\u00e9ville, Estelle and Wrisley, David Joseph},\ndoi = {10.5281/zenodo.7653691},\nmonth = {10},\ntitle = {Ground Truth Used in HTR for the Paris Bible Project},\nyear = {2021}\n}\n", "_apa": "Gu\u00e9ville E., Wrisley D.J. (2021). Ground Truth Used in HTR for the Paris Bible Project (version 1.0.0). DOI: 10.5281/zenodo.7653691 The source is the extensive correspondence of Swiss reformer Heinrich Bullinger (1504-1575) and his over 800 different correspondents. It therefore contains great variety in handwriting styles. Furthermore, it is multilingual since there are Latin and Early New High German (and sometimes mixed) letters. The data is split into Latin and Early New High German (determined with langid) and put into separate folders (de for Early New High German and la for Latin).", "project-website": "https://www.bullinger-digital.ch/", "language": [ "lat", "deu" ], "production-software": "Transkribus, own", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1523", "notAfter": "1575" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" }, "format": "Image-Text-Pairs", "volume": [ { "metric": "lines", "count": 165673 } ], "automatically-aligned": true, "transcription-guidelines": "Automated transcript alignment with Transkribus", "_pid": "ff2af829a" }, "35f73e074": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Caroline Minuscule by Rescribe", "url": "https://github.com/rescribe/carolineminuscule-groundtruth", "project-name": "Rescribe'\n", "project-website": "https://rescribe.xyz/", "authors": [ { "name": "White", "surname": "Nick", "roles": [ "transcriber", "project-manager" ] }, { "name": "Cl\u00e9rice", "surname": "Thibault", "roles": [ "aligner" ] }, { "name": "Karaisl", "surname": "Antonia", "roles": [ "transcriber", "project-manager" ] } ], "description": "This ground truth repository is a work in process; it currently accounts for a part of our complete Caroline Minuscule training pool of around 70 manuscripts used for our OCRopus Caroline Minuscule model (see ocropus-models repository).\n", "language": [ "lat" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "800", "notAfter": "1199" }, "hands": { "count": "1-per-file", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 17155 }, { "metric": "files", "count": 17 }, { "metric": "lines", "count": 457 }, { "metric": "regions", "count": 46 } ], "transcription-guidelines": "In general this meant deciding between diplomatic transcription (i.e. sticking to what it says on the page) and gently modernized features (i.e. reinterpreting medieval signs into modern equivalents) with a view to specific categories. Read on for a summary of the rules and the respective rationale behind them.\nSUMMARY\nPUNCTUATION\n\n Modern: medieval punctuation is transcribed with modern equivalents; punctus elevatus transcribed as semicolon\n\nCAPITALIZATION\n\n Diplomatic: Original capitalization retained\n\nABBREVIATIONS\n\n Diplomatic where possible: Retain abbreviations and render glyphs as opposed to expanded versions where possible\n \"*\" where original character isn't served: OCRopus (at the point in time of transcription) could not handle some of the medieval glyphs, even where a Unicode version was present. Abbreviations not in OCRopus are uniformly transcribed as \"*\", in the case of a combined character (such as a consonant with a macron) as the base character followed by \"*\" (e.g. \"t*\"). The list of accepted characters in OCRopus can be found in this repository, and downloaded and used as codec in the OCRopus training process.\n\nSPACING\n\n Diplomatic: Preserve manuscript spacing, i.e. give diplomatic transcription\n\nNUMBERS\n\n Diplomatic: retain original version of both Roman and Arabic numerals'", "characters": { "mode": "NFD", "members": [ "i", "e", "t", "u", "a", "s", "n", "o", "r", "m", "c", "d", "l", "p", ".", "b", "q", "g", "*", "h", ";", "\u0303", "f", "x", "I", "\u0304", "E", "N", "\u0328", ":", "&", "S", "\ua751", "C", "A", "\u0111", "D", "U", "T", "\ua753", "Q", "v", ",", "O", "R", "P", "L", "M", "\u00e6", "H", "F", "?", "1", "y", "\ua75d", "\ua759", "V", "4", "B", "z", "5", "X", "6", "\ua75b", "/", "'", "0", "2", "9", "K", "-" ] }, "production-software": "Unknown [Automatically filled]", "automatically-aligned": false, "_pid": "35f73e074" }, "84ee5d128": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "\u00c9diter la correspondance de Constance de Salm (1767-1845)", "url": "https://github.com/sbiay/CdS-edition/tree/main/htr/verite-terrain", "authors": [ { "name": "Biay", "surname": "S\u00e9bastien", "roles": [ "transcriber" ] } ], "institutions": [], "description": "La correspondance de Constance de Salm (femme de lettres fran\u00e7aise) comprend diff\u00e9rents sp\u00e9cimens d\u2019\u00e9criture du d\u00e9but du XIXe si\u00e8cle. Le jeu de donn\u00e9es atteste les mains de quatre copistes diff\u00e9rents.", "project-website": "https://dhiha.hypotheses.org/2945", "language": [ "fra" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1800", "notAfter": "1825" }, "hands": { "count": "less-than-11", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "sources": [ { "reference": "Salm, C. de (1767-1845). Correspondance. Soci\u00e9t\u00e9 des Amis du Vieux Toulon et de sa R\u00e9gion, Fonds Salm. Archiv Schloss Dyck, fonds Constance de Salm.", "link": "" } ], "volume": [ { "metric": "lines", "count": 1754 } ], "transcription-guidelines": "Usages scribaux respect\u00e9s : abr\u00e9viations, fautes, accentuation respect\u00e9s. Allographes normalis\u00e9s (s long).", "automatically-aligned": false, "_pid": "84ee5d128" }, "c2cf58d8f": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "EpiSearch HTR", "url": "https://github.com/vedph/episearch-htr", "authors": [ { "name": "Lorenzo", "surname": "Calvelli", "orcid": "0000-0002-0920-9156", "roles": [ "project-manager" ] }, { "name": "Tatiana", "surname": "Tommasi", "orcid": "0009-0000-2815-0113", "roles": [ "transcriber" ] }, { "name": "Federico", "surname": "Boschetti", "orcid": "0000-0002-7810-7735", "roles": [ "support" ] } ], "institutions": [], "description": "Ground Truth for Astori\u2019s letters (see the README.md file for details)", "project-name": "EpiSearch", "project-website": "https://github.com/vedph/episearch-htr", "language": [ "ita" ], "production-software": "eScriptorium + Kraken", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1705", "notAfter": "1709" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "files", "count": 34 } ], "automatically-aligned": false, "_pid": "c2cf58d8f" }, "12401acca": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "HPGTR Dataset", "url": "https://github.com/vivianpl/hpgtr", "authors": [ { "name": "Paraskevi", "surname": "Platanou", "roles": [ "transcriber", "project-manager" ] }, { "name": "John", "surname": "Pavlopoulos", "orcid": "0000-0001-9188-7425", "roles": [ "transcriber", "project-manager" ] }, { "name": "Georgios", "surname": "Papaioannou", "orcid": "0000-0003-4774-0746", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "The HPGT dataset consists of images of Handwritten Paleographic Greek Text, derived from the Bodleian Libraries' Greek manuscript collection, specifically the Barocci collection, which dates from the 8th to the 17th centuries. This dataset is divided into two editions: HPGTR.N, which contains 77 unsegmented images categorized by century from the 10th to the 16th, and HPGTR.S, which features carefully segmented lines from selected images to facilitate machine learning tasks. The dataset captures a range of characteristics, including variations in writing style, page conditions, and manuscript production details.\nThis dataset is part of the following work: Paraskevi Platanou, John Pavlopoulos, and Georgios Papaioannou. 2022. Handwritten Paleographic Greek Text Recognition: A Century-Based Approach. In *Proceedings of the \"Thirteenth Language Resources and Evaluation Conference\"*, pages 6585\u20136589, Marseille, France. European Language Resources Association.", "language": [ "grc" ], "transcription-guidelines": "- Abbreviation and ligatures were resolved\n- Minuscule in the beginning of sentences were kept as such.\n- Polytonic spelling and diaeresis are kept\n", "production-software": "Unknown", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "0901", "notAfter": "1600" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY-NC-SA 3.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "count": 1698, "metric": "lines" }, { "count": 70, "metric": "files" }, { "count": 178, "metric": "regions" }, { "count": 64952, "metric": "characters" } ], "_pid": "12401acca" }, "548220461": { "authors": [ { "name": "Maxime", "orcid": "0009-0006-2076-1220", "roles": [ "transcriber", "aligner", "quality-control" ], "surname": "Gu\u00e9nette" }, { "name": "Mathilde", "orcid": "0000-0003-1642-8610", "roles": [ "transcriber", "aligner", "quality-control" ], "surname": "Verstraete" }, { "name": "Alix", "orcid": "0000-0002-0136-4434", "roles": [ "quality-control", "support" ], "surname": "Chagu\u00e9" }, { "name": "Marcello", "orcid": "0000-0001-6424-3229", "roles": [ "project-manager" ], "surname": "Vitali-Rosati" } ], "automatically-aligned": false, "characters": { "members": [ "\u03b1", "\u03b9", "\u0301", "\u03bf", "\u03b5", "\u03bd", "\u03c3", "\u03c4", "\u0313", "\u03c5", "\u03c1", "\u00b7", "\u03ba", "\u03bb", "\u03b7", "\u0300", "\u03c0", "\u03bc", "\u03b4", "\u03c9", "\u0342", "\u03b8", "\u03b3", "\u0314", "\u03c7", "\u03c6", ":", "\u03b2", "\u1fbd", "\u22c7", "\u205b", "\u03be", "\u0308", "~", "\u03b6", "\u03c8", "\u203b", "\u223b", "\u0373" ], "mode": "NFD" }, "description": "Ground Truth dataset for the Codex palatinus graecus 23 (Palatine Anthology), byzantine writing from the X^th^ century. ", "format": "Alto-XML", "hands": { "count": "less-than-11", "precision": "estimated" }, "institutions": [], "language": [ "grc" ], "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "production-software": "eScriptorium + Kraken", "project-website": "https://anthologiagraeca.org/", "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "script": [ { "iso": "Grek", "qualify": "byzantine" } ], "script-type": "only-manuscript", "sources": [ { "link": "https://doi.org/10.11588/diglit.3449", "reference": "Cod. Pal. graec. 23 (10e s. av., Constantinople). Universit\u00e4tsbibliothek Heidelberg, Germany." } ], "time": { "notAfter": "1000", "notBefore": "900" }, "title": "Ground truth for the Palatine Anthology (HTR_CPgr23)", "transcription-guidelines": "we do not resolve the abbreviation, except when they are non ambiguous. Full guidelines available here https://gitlab.huma-num.fr/ecrinum/anthologia/htr_cpgr23", "url": "https://gitlab.huma-num.fr/ecrinum/anthologia/htr_cpgr23", "volume": [ { "count": 114273, "metric": "characters" }, { "count": 70, "metric": "files" }, { "count": 3374, "metric": "lines" }, { "count": 50, "metric": "pages" }, { "count": 574, "metric": "regions" } ], "_pid": "548220461" }, "afc133c30": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "La Correspondances Jacques Doucet - Ren\u00e9 Jean", "url": "https://gitlab.inha.fr/snr/LaCorrespondanceDoucetReneJean", "authors": [ { "name": "Cugy", "surname": "Pascale", "roles": [ "transcriber", "project-manager", "quality-control" ] }, { "name": "Fieschi", "surname": "Caroline", "roles": [ "project-manager", "quality-control" ] }, { "name": "Peyrard", "surname": "Alix", "roles": [ "transcriber", "quality-control" ] }, { "name": "Prohin", "surname": "Lucie", "roles": [ "transcriber", "quality-control" ] }, { "name": "Sarda", "surname": "Marie-Anne", "roles": [ "support" ] } ], "institutions": [ { "name": "Institut National de l'histoire de l'art (INHA)", "roles": [ "transcriber", "project-manager", "quality-control" ] }, { "name": "Biblioth\u00e8que nationale de France", "roles": [ "digitization" ] } ], "description": "Projet entrepris dans le cadre du programme La Biblioth\u00e8que d\u2019art et d\u2019arch\u00e9ologie de Jacques Doucet : corpus, savoirs et r\u00e9seaux de l\u2019Institut national d\u2019histoire de l\u2019art \u00e0 partir d\u2019un corpus de lettres et documents conserv\u00e9s au D\u00e9partement des manuscrits de la Biblioth\u00e8que nationale de France sous la cote NAF 13124, une des principales sources sur la relation entre Doucet et Ren\u00e9 Jean qu\u2019il engagea comme biblioth\u00e9caire le 2 juin 1908.", "project-name": "PENSE@INHA", "project-website": "https://skylab.inha.fr/PENSE/LettresDeJacquesDoucetAReneJean1908-1929/", "language": [ "fra" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1908", "notAfter": "1929" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": [ { "name": "Etalab OL 2.0", "url": "https://spdx.org/licenses/etalab-2.0.html" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 83312 }, { "metric": "lines", "count": 2987 }, { "metric": "pages", "count": 200 }, { "metric": "files", "count": 200 } ], "automatically-aligned": false, "_pid": "afc133c30" }, "98985e96b": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Les Papiers Barye", "url": "https://gitlab.inha.fr/snr/LesPapiersBarye", "authors": [ { "name": "Claass", "surname": "Victor", "roles": [ "transcriber", "project-manager", "quality-control" ] }, { "name": "Gain", "surname": "Justine", "roles": [ "transcriber", "quality-control" ] }, { "name": "Martin-Vigier", "surname": "Suzanne", "roles": [ "transcriber", "quality-control" ] } ], "institutions": [ { "name": "Institut National de l'histoire de l'art (INHA)", "roles": [ "transcriber", "aligner", "project-manager", "quality-control", "digitization" ] } ], "description": "Ensemble de documents autour du sculpteur Antoine-Louis Barye. Paris, Biblioth\u00e8que de l\u2019Institut national d\u2019histoire de l\u2019art, collections Jacques Doucet, Archives 166. Institut National de l\u2019Histoire de l\u2019art (INHA) / Set of documents about the sculptor Antoine-Louis Barye. Paris, Library of the Institut national d'histoire de l'art, Jacques Doucet, Archives 166. National Institute of Art History (INHA)", "project-name": "PENSE@INHA", "project-website": "https://skylab.inha.fr/PENSE/LesPapiersBarye/", "language": [ "fra" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "mainly-manuscript", "time": { "notBefore": "1819", "notAfter": "1914" }, "hands": { "count": "more-than-10", "precision": "exact" }, "license": [ { "name": "Etalab OL 2.0", "url": "https://spdx.org/licenses/etalab-2.0.html" } ], "format": "Alto-XML", "volume": [ { "metric": "characters", "count": 362629 }, { "metric": "lines", "count": 17880 }, { "metric": "pages", "count": 918 }, { "metric": "files", "count": 918 } ], "automatically-aligned": false, "_pid": "98985e96b" }, "1dd38d4a3": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Ground truth for Neue Z\u00fcrcher Zeitung black letter period", "url": "https://zenodo.org/record/3333627#.YhN1G1vMLUQ", "project-name": "impresso'\n", "project-website": "https://impresso-project.ch/", "authors": [ { "name": "Str\u00f6bel", "surname": "Phillip Benjamin", "roles": [ "transcriber", "aligner", "project-manager", "quality-control", "support" ] }, { "name": "Clematide", "surname": "Simon", "roles": [ "transcriber", "quality-control" ] }, { "name": "Watter", "surname": "Camille", "roles": [ "transcriber" ] }, { "name": "Meraner", "surname": "Isabell", "roles": [ "transcriber" ] } ], "description": "The Neue Z\u00fcrcher Zeitung (NZZ) has been publishing in black letter from its very first issue in 1780 until 1947. From this time period, we randomly sampled one frontpage per year, resulting in a total of 167 pages. We chose frontpages because they typically contain highly relevant material and because we want to make sure not to sample pages containing exclusively advertisements or stock information. During certain periods, the NZZ was published several times a day, and there were supplements, too. Due to incomplete metadata, the sampling included frontpages from supplements. We then manually corrected the pages, so it can be used as a ground truth to improve the OCR of black letter in historical newspapers.i\n", "language": [ "deu" ], "script": [ { "iso": "Latn" } ], "script-type": "only-typed", "time": { "notBefore": "1780", "notAfter": "1946" }, "hands": { "count": "less-than-11", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "count": 43173, "metric": "lines" }, { "count": 167, "metric": "files" }, { "count": 6318, "metric": "regions" }, { "count": 1768146, "metric": "characters" } ], "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@dataset{phillip_strobel_2019_3333627,\n author = {Phillip Str\u00f6bel and\n Simon Clematide},\n title = {{Ground truth for Neue Z\u00fcrcher Zeitung black letter \n period}},\n month = jul,\n year = 2019,\n publisher = {Zenodo},\n version = {v1.0},\n doi = {10.5281/zenodo.3333627},\n url = {https://doi.org/10.5281/zenodo.3333627}\n}", "_pid": "1dd38d4a3" }, "d136cb2a7": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Gwalther Handwriting Ground Truth", "url": "https://zenodo.org/record/4780947#.YhN5pVvMLUQ", "project-name": "Bullinger digital'\n", "project-website": "https://www.bullinger-digital.ch/", "authors": [ { "name": "Str\u00f6bel", "surname": "Phillip Benjamin", "roles": [ "aligner", "quality-control", "support" ] }, { "name": "Stotz", "surname": "Peter", "roles": [ "transcriber" ] } ], "description": "This is ground truth for Rudolph Gwalther\u2019s (1519-1586) handwriting taken from his book \"Lateinische\" Gedichte\", where he accumulated writings between 1540 and 1580. Data collection and ground truth creation: At the time we collected the data, we found 150 images with corresponding transcriptions by Peter Stotz on e-manuscripta (reference: Gwalther, Rudolf: Lateinische Gedichte. Z\u00fcrich, 1540-1580. Zentralbibliothek Z\u00fcrich, Ms D 152, https://doi.org/10.7891/e-manuscripta-26750 / Public Domain Mark) . We removed 8 images with too many corrections or vertical texts. Next, we uploaded the images into the Transkribus platform, applied the line recognition tool and manually copied the transcribed text lines into the recognised line boxes. During this process, we made some corrections, which were mainly due to inconsistencies in punctuation and capitalised letters.\n", "language": [ "lat" ], "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1540", "notAfter": "1580" }, "hands": { "count": "1", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Alto-XML", "volume": [ { "count": 4040, "metric": "lines" }, { "count": 142, "metric": "files" }, { "count": 155, "metric": "regions" }, { "count": 144301, "metric": "characters" } ], "production-software": "Transkribus", "automatically-aligned": false, "_bibtex": "@dataset{peter_stotz_2021_4780947,\n author = {Peter Stotz and\n Phillip Str\u00f6bel},\n title = {{bullinger-digital/gwalther-handwriting-ground- \n truth: Initial release}},\n month = may,\n year = 2021,\n publisher = {Zenodo},\n version = {v1.0},\n doi = {10.5281/zenodo.4780947},\n url = {https://doi.org/10.5281/zenodo.4780947}\n}", "_pid": "d136cb2a7" }, "9bb25da33": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "BiblIA", "url": "https://zenodo.org/record/5167263", "project-name": "Scripta PSL\n", "project-website": "https://escripta.hypotheses.org/", "authors": [ { "name": "St\u00f6kl Ben Ezra", "surname": "Daniel", "roles": [ "transcriber", "project-manager" ] }, { "name": "Brown-DeVost", "surname": "Bronson" }, { "name": "Jablonski", "surname": "Pawel" }, { "name": "Kiessling", "surname": "Benjamin" }, { "name": "Lolli", "surname": "Elena" }, { "name": "Lapin", "surname": "Hayim" } ], "description": "This dataset for Handwritten Text Recognition includes layout segmentation (regions, toplines and linepolygons) and unicode-transcriptions in alto 4.2 XML for 202 images of Medieval Hebrew manuscripts from the Biblioth\u00e8que nationale de France (BnF, National Library of France) and the Biblioteca Apostolica Vaticana (BAV, Vatican Library) corresponding to the article \"BiblIA - a General Model for Medieval Hebrew Manuscripts and an Open Annotated Dataset\" by Daniel St\u00f6kl Ben Ezra, Bronson Brown-DeVost, Pawel Jablonski, Benjamin Kiessling, Elena Lolli, and Hayim Lapin, published in HIP@ICDAR 2021 held in Lausanne, September 2021.\n", "language": [ "heb" ], "script": [ { "iso": "Hebr" } ], "script-type": "only-manuscript", "time": { "notBefore": "1000", "notAfter": "1499" }, "hands": { "count": "more-than-10", "precision": "exact" }, "license": [ { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "files", "count": 202 }, { "metric": "pages", "count": 202 }, { "metric": "lines", "count": 12461 }, { "metric": "regions", "count": 509 }, { "metric": "characters", "count": 278641 } ], "transcription-guidelines": "See the guidelines detailed in Stoekl Ben Ezra Daniel, Brown-DeVost Bronson, Jablonski Pawel, Lapin Hayim, Kiessling Benjamin, and Lolli Elena. 2021. BiblIA - a General Model for Medieval Hebrew Manuscripts and an Open Annotated Dataset. In The 6th International Workshop on Historical Document Imaging and Processing (HIP '21). Association for Computing Machinery, New York, NY, USA, 61\u201366. DOI:https://doi.org/10.1145/3476887.3476896'\n", "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "_bibtex": "@dataset{stokl_ben_ezra_2021_5167263,\n author = {St\u00f6kl Ben Ezra, Daniel and\n Brown-DeVost, Bronson and\n Jablonski, Pawel and\n Kiessling, Benjamin and\n Lolli, Elena and\n Lapin, Hayim},\n title = {BiblIA - an Open Annotated Dataset},\n month = aug,\n year = 2021,\n publisher = {Zenodo},\n version = {1.0},\n doi = {10.5281/zenodo.5167263},\n url = {https://doi.org/10.5281/zenodo.5167263}\n}", "_pid": "9bb25da33" }, "3980a7dcd": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "The POPP datasets", "url": "https://zenodo.org/record/6581158", "authors": [ { "name": "Thomas", "surname": "Constum", "roles": [ "aligner", "quality-control", "support" ] }, { "name": "Nicolas", "surname": "Kempf" }, { "name": "Pierrick", "surname": "Tranouez" }, { "name": "Thierry", "surname": "Paquet", "roles": [ "project-manager" ] }, { "name": "Sandra", "surname": "Br\u00e9e", "orcid": "0000-0002-2802-5563", "roles": [ "transcriber", "project-manager" ] }, { "name": "Fran\u00e7ois", "surname": "Merveille", "roles": [ "transcriber" ] } ], "institutions": [], "description": "The POPP datasets is a set of 3 datasets created within the POPP project (Project for the Oceration of the Paris Population Census) for the task of handwriting text recognition. These datasets have been published in \"Recognition and information extraction in historical handwritten tables: toward understanding early 20th century Paris census\" at DAS 2022.\n\nThe 3 datasets are called \u201cGeneric dataset\u201d, \u201cBelleville\u201d, and \u201cChauss\u00e9e d\u2019Antin\u201d and contains lines made from the extracted rows of census tables from 1926. Each table in the Paris census contains 30 rows, thus each page in these datasets corresponds to 30 lines.", "project-name": "Project for the Oceration of the Paris Population Census", "project-website": "https://popp.hypotheses.org", "language": [ "fra" ], "production-software": "Pivan", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1926", "notAfter": "1926" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "volume": [ { "metric": "lines", "count": 7050 } ], "transcription-guidelines": "The text is transcribed as in the image (no correction of mispelling, no resolution of abbreviation).\nSince the lines are extracted from table rows, we defined 4 special characters to describe the structure of the text:\n \u00a4 : indicates an empty cell\n / : indicates the separation into columns\n ? : indicates that the content of the cell following this symbol is written above the regular baseline\n ! : indicates that the content of the cell following this symbol is written below the regular baseline\n", "automatically-aligned": false, "_bibtex": "@dataset{constum_2022_6581158,\n author = {CONSTUM, Thomas and\n KEMPF, Nicolas and\n PAQUET, Thierry and\n TRANOUEZ, Pierrick and\n CHATELAIN, Cl\u00e9ment and\n BREE, Sandra and\n MERVEILLE, Fran\u00e7ois},\n title = {{POPP Datasets : Datasets for handwriting \n recognition from French population census}},\n month = may,\n year = 2022,\n publisher = {Zenodo},\n version = {v1.0},\n doi = {10.5281/zenodo.6581158},\n url = {https://doi.org/10.5281/zenodo.6581158}\n}", "_pid": "3980a7dcd" }, "32f975946": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Wien \u00d6NB Cod. 2160 f. 164-184 Ground Truth from HTR Winter School 2022", "url": "https://zenodo.org/record/7467027#.Y6LRj3bMK3B", "authors": [ { "name": "Geelhaar", "surname": "Tim", "orcid": "0000-0002-7653-5859", "roles": [ "transcriber", "project-manager" ] }, { "name": "D'Amico", "surname": "Sara", "orcid": "0000-0002-8937-2040", "roles": [ "transcriber" ] }, { "name": "Hofmann", "surname": "Lara", "orcid": "0000-0003-4698-3906", "roles": [ "transcriber" ] }, { "name": "Gnasso", "surname": "Alessandro", "orcid": "0000-0001-5964-2989", "roles": [ "transcriber" ] }, { "name": "Audebrand", "surname": "Justine", "roles": [ "transcriber" ] }, { "name": "Stitts", "surname": "Jeremy", "orcid": "0000-0001-6988-1836", "roles": [ "transcriber" ] }, { "name": "Sweeney", "surname": "Mary", "orcid": "0000-0001-7028-2072", "roles": [ "transcriber" ] }, { "name": "Atwood", "surname": "Grace", "orcid": "0000-0002-1546-6546", "roles": [ "transcriber" ] } ], "institutions": [], "description": "This is Ground Truth data created during the HTR Winter School 2022 for the Cod. 2160 \u00d6NB that contains one version of the so called Lex Dei. ", "project-name": "HTR Winter School 2022, Vienna", "language": [ "lat" ], "production-software": "Transkribus", "script": [ { "iso": "Latn", "qualify": "Carolingian Minuscule" } ], "script-type": "only-manuscript", "time": { "notBefore": "850", "notAfter": "900" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Alto-XML", "sources": [ { "reference": "", "link": "http://data.onb.ac.at/rec/AC13956457" } ], "volume": [ { "metric": "pages", "count": 40 } ], "transcription-guidelines": "Abbreviations resolved, but no normalization and no correcting of mispelling. No transcription of initials and interlinear script.", "automatically-aligned": false, "_bibtex": "@dataset{attwood_2022_7467027,\n author = {Attwood and\n Sweeney and\n Stitts and\n Audebrand and\n D'Amico and\n Geelhaar and\n Hofmann and\n Gnasso},\n title = {{Wien \u00d6NB Cod. 2160 f. 164-184 Ground Truth from \n HTR Winter School 2022}},\n month = dec,\n year = 2022,\n publisher = {Zenodo},\n doi = {10.5281/zenodo.7467027},\n url = {https://doi.org/10.5281/zenodo.7467027}\n}", "_pid": "32f975946" }, "43573de7e": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Pade\u0159ov-Bible-handwriting-ground-truth", "url": "https://zenodo.org/record/7467034#.Y6LQZBWZM2w", "authors": [ { "name": "Anna", "surname": "Michalcov\u00e1", "orcid": "0000-0003-4760-6950", "roles": [ "transcriber", "aligner", "project-manager", "quality-control", "support" ] }, { "name": "Jan", "surname": "Odstr\u010dil\u00edk", "orcid": "0000-0001-9104-9827", "roles": [ "project-manager", "support" ] }, { "name": "Laura", "surname": "Maniakov\u00e1", "roles": [ "transcriber" ] }, { "name": "Eli\u0161ka", "surname": "P\u011bnkavov\u00e1", "orcid": "0000-0002-5494-8847" }, { "name": "Kamil", "surname": "Bazelides", "orcid": "0000-0002-5199-8726" }, { "name": "Jan", "surname": "Haji\u010d", "orcid": "0000-0002-9207-567X" }, { "name": "Hana", "surname": "Kreisingerov\u00e1", "orcid": "0000-0002-2924-598X" }, { "name": "Jitka", "surname": "Filipov\u00e1", "orcid": "0000-0002-3570-4038" }, { "name": "Chi-hung", "surname": "Liu" }, { "name": "Martina", "surname": "Dvo\u0159\u00e1kov\u00e1" } ], "institutions": [ { "name": "Institute of the Czech Language" }, { "name": "Masaryk Institute and Archives" } ], "description": "This is ground truth based on the Pade\u0159ov Bible (Vienna, Austrian National Library, shelfmark Cod. 1175, 1432\u20131435), the bible of the third redaction of the Old Czech Bible translation. The transcription rules were based on semi-diplomatic transcription rules set by PERO OCR and Sm\u011brnice pro vyd\u00e1v\u00e1n\u00ed star\u0161\u00edch \u010desk\u00fdch text\u016f set by Ji\u0159\u00ed Da\u0148helka (https://vokabular.ujc.cas.cz/moduly/edicnipoznamka.aspx?id=DanhelkaSmernice). Abbreviations were tagged and expanded.", "project-name": "HTR Winter School 2022, Vienna", "project-website": "https://www.oeaw.ac.at/imafo/veranstaltungen/detail/introduction-into-handwritten-text-recognition-1", "language": [ "ces" ], "production-software": "Transkribus", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1432", "notAfter": "1435" }, "hands": { "count": "1", "precision": "exact" }, "license": [ { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" } ], "format": "Page-XML", "sources": [ { "reference": "", "link": "https://search.onb.ac.at/primo-explore/fulldisplay?docid=ONB_alma21302405460003338&context=L&adaptor=Local%20Search%20Engine&vid=ONB&lang=de_DE&search_scope=ONB_gesamtbestand&tab=default_tab&query=addsrcrid,exact,AC13954505" } ], "volume": [ { "metric": "pages", "count": 63 } ], "transcription-guidelines": "Transliteration. Differentiates long and short \"s\". Abbreviations tagged and expanded. No misspelling corrections.", "automatically-aligned": false, "_bibtex": "@dataset{michalcova_2022_7467034,\n author = {Michalcov\u00e1, Anna and\n Bazelides, Kamil and\n Haji\u010d, Jan and\n P\u011bnkavov\u00e1, Eli\u0161ka and\n Maniakov\u00e1, Laura and\n Kreisingerov\u00e1, Hana and\n Filipov\u00e1, Jitka and\n Chi-hung Lu and\n Dvo\u0159\u00e1kov\u00e1, Martina},\n title = {{Pade\u0159ov-Bible-handwriting-ground-truth: Initial \n release}},\n month = dec,\n year = 2022,\n publisher = {Zenodo},\n doi = {10.5281/zenodo.7467034},\n url = {https://doi.org/10.5281/zenodo.7467034}\n}", "_pid": "43573de7e" }, "96b246dfa": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Belfort", "url": "https://zenodo.org/record/8041668", "authors": [ { "name": "Sol\u00e8ne", "surname": "Tarride", "orcid": "0000-0001-6174-9865" }, { "name": "Tristan", "surname": "Faine" }, { "name": "M\u00e9lodie", "surname": "Boillet", "orcid": "0000-0002-0618-7852" }, { "name": "Harold", "surname": "Mouch\u00e8re", "orcid": "0000-0001-6220-7216" }, { "name": "Christopher", "surname": "Kermorvant", "orcid": "0000-0002-7508-4080" } ], "institutions": [], "description": "This dataset includes minutes of Belfort municipal council drawn up between 1790 and 1946. Documents include deliberations, lists of councillors, convocations, and agendas. The dataset includes 24,105 text-line images that were automatically detected from pages. \nUp to four transcriptions are available for each line image: \n* two from human annotators (in `Transcriptions/callico_1/` and `Transcriptions/callico_2/`)\n* two from automatic models (in `Transcriptions/dan/` and `Transcriptions/pylaia/`) \n", "project-name": "Handwritten Text Recognition from Crowdsourced Annotations", "project-website": "https://arxiv.org/abs/2306.10878", "language": [ "fra" ], "production-software": "Callico", "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1790", "notAfter": "1946" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Image-Text-Pairs", "sources": [ { "reference": "Sol\u00e8ne Tarride, Tristan Faine, M\u00e9lodie Boillet, Harold Mouch\u00e8re, & Christopher Kermorvant. (2023). The Belfort dataset: Handwritten Text Recognition from Crowdsourced Annotations [Data set]. 7th International Workshop on Historical Document Imaging and Processing (HIP'23), San Jos\u00e9, California, USA. Zenodo. https://doi.org/10.5281/zenodo.8041668", "link": "https://arxiv.org/abs/2306.10878" } ], "volume": [ { "metric": "lines", "count": 24105 } ], "_bibtex": "@dataset{solene_tarride_2023_8041668,\n author = {Sol\u00e8ne Tarride and\n Tristan Faine and\n M\u00e9lodie Boillet and\n Harold Mouch\u00e8re and\n Christopher Kermorvant},\n title = {{The Belfort dataset: Handwritten Text Recognition \n from Crowdsourced Annotations}},\n month = jun,\n year = 2023,\n publisher = {Zenodo},\n doi = {10.5281/zenodo.8041668},\n url = {https://doi.org/10.5281/zenodo.8041668}\n}", "_pid": "96b246dfa" }, "fc63dcc30": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "EPARCHOS", "url": "https://zenodo.org/records/4095301", "authors": [ { "name": "Aleksandros", "surname": "Papazoglou", "roles": [ "transcriber", "project-manager" ] }, { "name": "Ioannis", "surname": "Pratikakis", "orcid": "0000-0002-4124-3688", "roles": [ "transcriber", "project-manager" ] }, { "name": "Kleopatra", "surname": "Markou", "roles": [ "transcriber", "project-manager" ] }, { "name": "Lazaros", "surname": "Tsochatzidis", "orcid": "0000-0002-4634-7419", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "The dataset originates from a Greek handwritten codex that dates from around 1500-1530. This is the subset of the codex British Museum Addit. 6791, written by two hands, one by Antonius Eparchos and the other by Camillos Zanettus (ff. 104r-174v) and delivers texts by Hierocles (In Aureum carmen), Matthaeus Blastares (Collectio alphabetica) and, notably, texts by Michael Psellos (De omnifaria doctrina). The writing delivers the most important abbreviations, logograms and conjunctions, which are cited in virtually every Greek minuscule handwritten codex from the years of the manuscript transliteration and the prevalence of the minuscule script (9th century) to the post-Byzantine years. This dataset consists of 120 scanned handwritten text pages, containing 9285 lines of text, 18809 words (6787 unique words). For each page, a PageXML is provided containing the following groundtruth: 1. Text region polygon coordinates 2. Text line polygon coordinates with the corresponding transcription text 3. Word polygon coordinated with the corresponding transcription text", "language": [ "grc" ], "transcription-guidelines": "- Abbreviation and ligatures were resolved\n- Minuscule in the beginning of sentences were kept as such.\n- Polytonic spelling and diaeresis are kept\n", "production-software": "Unknown", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "1500", "notAfter": "1530" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "lines", "count": 2272 }, { "metric": "characters", "count": 116894 }, { "metric": "files", "count": 120 } ], "_pid": "fc63dcc30" }, "a4fc096c0": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Stavronikita Monastery Collection No. 79", "url": "https://zenodo.org/records/5578136", "authors": [ { "name": "Ioannis", "surname": "Pratikakis", "orcid": "0000-0002-4124-3688", "roles": [ "transcriber", "project-manager" ] }, { "name": "Aleksandros", "surname": "Papazoglou", "roles": [ "transcriber", "project-manager" ] }, { "name": "Symeon", "surname": "Symeonidis", "orcid": "0000-0002-3259-614X", "roles": [ "transcriber", "project-manager" ] }, { "name": "Lazaros", "surname": "Tsochatzidis", "orcid": "0000-0002-4634-7419", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "It comprises manuscripts made of paper, written in the 16th century and its dimensions are 220X165 mm. The manuscript is embellished with epititles and red initials. Tachygraphical symbols and abbreviations are encountered in the manuscript as well. The dataset of X\u03a679 consists of 803 lines of text containing 4389 words (2069 unique words) that are distributed over 40 scanned handwritten text pages. For each page, a PageXML is provided containing the following ground-truth: 1. Text region polygon coordinates 2. Text line polygon coordinates with the corresponding transcription text 3. Word polygon coordinated with the corresponding transcription text", "language": [ "grc" ], "transcription-guidelines": "- Abbreviation and ligatures were resolved\n- Minuscule in the beginning of sentences were kept as such.\n- Polytonic spelling and diaeresis are kept\n", "production-software": "Unknown", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "1501", "notAfter": "1600" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "count": 803, "metric": "lines" }, { "count": 40, "metric": "files" }, { "count": 40, "metric": "regions" }, { "count": 29112, "metric": "characters" } ], "_pid": "a4fc096c0" }, "d9283ba57": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Stavronikita Monastery Collection No. 114", "url": "https://zenodo.org/records/5578251", "authors": [ { "name": "Ioannis", "surname": "Pratikakis", "orcid": "0000-0002-4124-3688", "roles": [ "transcriber", "project-manager" ] }, { "name": "Aleksandros", "surname": "Papazoglou", "roles": [ "transcriber", "project-manager" ] }, { "name": "Symeon", "surname": "Symeonidis", "orcid": "0000-0002-3259-614X", "roles": [ "transcriber", "project-manager" ] }, { "name": "Lazaros", "surname": "Tsochatzidis", "orcid": "0000-0002-4634-7419", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "It comprises manuscripts made of paper, written at the end of the 15th century and its dimensions are 218X150 mm. In various pages, we find red initials and epititles which enrich the manuscript\u2019s decoration. \nThe dataset of \u03a7\u03a6114 consists of 1051 lines of text containing 5467 (2877 unique words) words that are distributed over 44 scanned handwritten text pages. \nFor each page, a PageXML is provided containing the following ground-truth:\n1. Text region polygon coordinates 2. Text line polygon coordinates with the corresponding transcription text 3. Word polygon coordinated with the corresponding transcription text", "language": [ "grc" ], "transcription-guidelines": "- Abbreviation and ligatures were resolved\n- Minuscule in the beginning of sentences were kept as such.\n- Polytonic spelling and diaeresis are kept\n", "production-software": "Unknown", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "1401", "notAfter": "1500" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "count": 1006, "metric": "lines" }, { "count": 44, "metric": "files" }, { "count": 44, "metric": "regions" }, { "count": 36898, "metric": "characters" } ], "_pid": "d9283ba57" }, "f61ad9fc5": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Stavronikita Monastery Collection No. 53", "url": "https://zenodo.org/records/5595669", "authors": [ { "name": "Ioannis", "surname": "Pratikakis", "orcid": "0000-0002-4124-3688", "roles": [ "transcriber", "project-manager" ] }, { "name": "Aleksandros", "surname": "Papazoglou", "roles": [ "transcriber", "project-manager" ] }, { "name": "Symeon", "surname": "Symeonidis", "orcid": "0000-0002-3259-614X", "roles": [ "transcriber", "project-manager" ] }, { "name": "Lazaros", "surname": "Tsochatzidis", "orcid": "0000-0002-4634-7419", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "The collection is one of the oldest Stavronikita Monastery on Mount Athos. It is a parchment, four-gospel manuscript which has been written between 1301 and 1350. It comprises 54 pages with dimensions that are approximately\n 250x185 mm. The script is elegant minuscule and the use of majuscule letters\n is rare. Tachygraphical symbols and abbreviations are encountered in the \n manuscript as well. Furthermore, the manuscript is enriched with \n chrysography, elegant epititles and initials. \n\n The dataset of \u03a7\u03a653 consists of 1038 lines of text containing 5592 words\n (2374 unique words) that are distributed over 54 scanned handwritten text pages.", "language": [ "grc" ], "transcription-guidelines": "- Abbreviation and ligatures were resolved\n- Minuscule in the beginning of sentences were kept as such.\n- Polytonic spelling and diaeresis are kept\n", "production-software": "Unknown", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "1301", "notAfter": "1350" }, "hands": { "count": "less-than-11", "precision": "exact" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "count": 1038, "metric": "lines" }, { "count": 54, "metric": "files" }, { "count": 54, "metric": "regions" }, { "count": 37070, "metric": "characters" } ], "_pid": "f61ad9fc5" }, "6bd74ee8c": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "Ground-Truthed Data Set of Zenon Papyri for Handwritten Text Recognition", "url": "https://zenodo.org/records/6565706", "authors": [ { "name": "Isabelle", "surname": "Marthot-Santaniello", "orcid": "0000-0003-0407-8748", "roles": [ "transcriber", "project-manager" ] }, { "name": "Hodel", "surname": "Tobias", "orcid": "0000-0002-2071-6407", "roles": [ "transcriber", "project-manager" ] } ], "institutions": [], "description": "Diplomatic transcription of papyri found in the Zenon archive [see en.wikipedia.org/wiki/Zenon_of_Kaunos]\n\nManually prepared as PageXML with Transkribus within D-Scribes project.", "project-name": "D-Scribes", "project-website": "https://d-scribes.philhist.unibas.ch/en/", "language": [ "grc" ], "production-software": "Transkribus", "automatically-aligned": false, "characters": { "mode": "NFD" }, "script": [ { "iso": "Grek" } ], "script-type": "only-manuscript", "time": { "notBefore": "-250", "notAfter": "-230" }, "hands": { "count": "unknown", "precision": "estimated" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "lines", "count": 321 }, { "metric": "characters", "count": 5850 }, { "metric": "files", "count": 27 } ], "_pid": "6bd74ee8c" }, "b5b1358dc": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "ANR e-NDP Ground Truth", "url": "https://zenodo.org/records/7575693", "authors": [ { "name": "Julie", "surname": "Claustre", "orcid": "0000-0001-8504-3920", "roles": [ "transcriber", "project-manager" ] }, { "name": "Darwin", "surname": "Smith", "roles": [ "transcriber", "project-manager" ] }, { "name": "Sergio", "surname": "Torres Aguilar", "orcid": "0000-0002-1801-3147", "roles": [ "aligner", "quality-control", "support" ] }, { "name": "Isabelle", "surname": "Bretthauer", "orcid": "0000-0002-1780-772X", "roles": [ "transcriber" ] }, { "name": "Pierre", "surname": "Brochard", "orcid": "0000-0003-1955-556X", "roles": [ "quality-control" ] }, { "name": "Olivier", "surname": "Canteaut", "orcid": "0000-0003-4586-1931", "roles": [ "transcriber", "quality-control" ] }, { "name": "Emilie", "surname": "Cottereau", "orcid": "0000-0001-6880-2112", "roles": [ "transcriber" ] }, { "name": "Fabrice", "surname": "Delivr\u00e9", "roles": [ "transcriber" ] }, { "name": "Mathilde", "surname": "Denglos", "roles": [ "transcriber" ] }, { "name": "Vincent", "surname": "Jolivet", "orcid": "0000-0003-0600-0362", "roles": [ "aligner", "quality-control", "support" ] }, { "name": "V\u00e9ronique", "surname": "Julerot", "roles": [ "transcriber" ] }, { "name": "Thierry", "surname": "Kouam\u00e9", "orcid": "0000-0001-9728-2988", "roles": [ "transcriber" ] }, { "name": "Elisabeth", "surname": "Lusset", "orcid": "0000-0003-1572-1890", "roles": [ "transcriber" ] }, { "name": "Anne", "surname": "Massoni", "orcid": "0000-0002-1690-9804", "roles": [ "transcriber" ] }, { "name": "Sebastien", "surname": "Nadiras", "roles": [ "transcriber" ] }, { "name": "Nicolas", "surname": "Perreaux", "orcid": "0000-0002-0103-817X", "roles": [ "transcriber" ] }, { "name": "Hugo", "surname": "Regazzi", "orcid": "0000-0002-3059-2874", "roles": [ "transcriber" ] }, { "name": "Mathilde", "surname": "Treglia", "roles": [ "transcriber" ] } ], "institutions": [], "description": "This repository hosts HTR ground truth created within the context of the ANR e-NDP project.\nThis dataset based on 512 pages from the 26 registers of the Notre-Dame de Paris cathedral chapter.\nThe volumes containing the chapter conclusions were conceived to serve as memorial records, but above all as documents for regular use and consultation in the daily practice of administration and management. \nThe registers were written using a Cursive script (ca. late XIIIe - XVIe) and their content is were written mainly in Latin, the rest in French. There are no fewer than 18 hands in these pages.\n\n The transcriptions were manually completed in two rounds by a group of 12 contributors, including historians and paleographers, over the course of 2021-2022 using eScriptorium.", "project-name": "ANR e-NDP", "project-website": "https://endp.hypotheses.org/presentation", "language": [ "fra", "lat" ], "production-software": "eScriptorium + Kraken", "automatically-aligned": true, "script": [ { "iso": "Latn", "qualify": "cursive" } ], "script-type": "only-manuscript", "time": { "notBefore": "1326", "notAfter": "1504" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": { "name": "CC-BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "pages", "count": 512 }, { "metric": "lines", "count": 34231 }, { "metric": "characters", "count": 3320407 }, { "metric": "files", "count": 512 }, { "metric": "regions", "count": 2448 } ], "transcription-guidelines": "- The abbreviations have been resolved, both those by suspension (facim\ua770 ---> facimus) and by contraction (d\u00f1i --> domini). Likewise, those using conventional signs (\u204a --> et ; \ua753 --> pro) have been resolved. \n- The named entities (names of persons, places and institutions) have been capitalized. The beginning of a block of text as well as the original capitals used by the notary are also capitalized.\n- The consonantal i and u characters have been transcribed as j and v in both French and Latin.\n- The punctuation marks used in the text: . and / have been transcribed, but the transcription has not been standardized with modern punctuation.\n- Corrections and words that appear cancelled in the manuscript have been transcribed surrounded by the sign $ at the beginning and at the end.\n- More specific transcription rules can be found into the file `transcription_guidelines.pdf` on Zenodo repository. ", "_pid": "b5b1358dc" }, "b00eb4153": { "schema": "https://htr-united.github.io/schema/2023-06-27/schema.json", "title": "ARletta", "url": "zenodo.org/records/11191457", "authors": [ { "name": "Lith", "surname": "Lefranc" }, { "name": "Ilja", "surname": "Van Damme" }, { "name": "Thibault", "surname": "Cl\u00e9rice" }, { "name": "Mike", "surname": "Kestemont" } ], "institutions": [ { "name": "University of Antwerp" }, { "name": "National Institute for Research in Digital Science and Technology, Paris" } ], "description": "Open-source handwritten text recognition models for historic Dutch", "project-name": "Bias in History", "project-website": "https://www.bias-in-history.eu/", "language": [ "nld", "fra" ], "production-software": "eScriptorium + Kraken", "automatically-aligned": false, "script": [ { "iso": "Latn" } ], "script-type": "only-manuscript", "time": { "notBefore": "1600", "notAfter": "1940" }, "hands": { "count": "more-than-10", "precision": "estimated" }, "license": { "name": "CC-BY-SA 4.0", "url": "https://creativecommons.org/licenses/by-sa/4.0/" }, "format": "Page-XML", "volume": [ { "metric": "lines", "count": 431359 }, { "metric": "regions", "count": 44536 }, { "metric": "pages", "count": 10267 }, { "metric": "characters", "count": 14253206 } ], "transcription-guidelines": "**Diplomatic transcription.** All of the text was transcribed verbatim, preserving all of its original features:\n- orthography: preserve original spelling\n- abbreviations: do not expand abbreviations\n- capitalization: retain original use of uppercase and lowercase letters\n- punctuation: transcribe punctuation marks exactly as they appear, even of they are unconventional by modern standards\n- special characters: include any special characters or symbols as they appear\n- formatting: maintain original formatting such as underlining or strikethrough\n- errors and corrections: include all errors and corrections found in the text\n- non-interpretative: avoid interpreting or modernizing the text\n- use the '@' symbol for characters you can not read an tag them as 'unclear' on baseline level\n- tag marginal text as 'marginalia' and main body text as 'paragraph' on region level", "_pid": "b00eb4153" } }