{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "FullPaper", "description": "A full Semantic Scholar paper record with all available fields", "$id": "https://raw.githubusercontent.com/api-evangelist/semantic-scholar/refs/heads/main/json-schema/semantic-scholar-paper.json", "properties": { "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "externalIds": { "type": "object", "description": "An object that contains the paper\u2019s unique identifiers in external sources. The external sources are limited to: ArXiv, MAG, ACL, PubMed, Medline, PubMedCentral, DBLP, and DOI.", "example": { "MAG": "3015453090", "DBLP": "conf/acl/LoWNKW20", "ACL": "2020.acl-main.447", "DOI": "10.18653/V1/2020.ACL-MAIN.447", "CorpusId": 215416146 } }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "abstract": { "type": "string", "description": "The paper's abstract. Note that due to legal reasons, this may be missing even if we display an abstract on the website.", "example": "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery." }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "referenceCount": { "type": "integer", "description": "The total number of papers this paper references.", "example": 59 }, "citationCount": { "type": "integer", "description": "The total number of papers that references this paper.", "example": 453 }, "influentialCitationCount": { "type": "integer", "description": "A subset of the citation count, where the cited publication has a significant impact on the citing publication. Determined by Semantic Scholar\u2019s algorithm: https://www.semanticscholar.org/faq#influential-citations.", "example": 90 }, "isOpenAccess": { "type": "boolean", "description": "Whether the paper is open access. More information here: https://www.openaccess.nl/en/what-is-open-access.", "example": true }, "openAccessPdf": { "type": "object", "description": "An object that contains the following parameters: url (a link to the paper\u2019s PDF), status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system), the paper's license, and a legal disclaimer.", "example": { "url": "https://www.aclweb.org/anthology/2020.acl-main.447.pdf", "status": "HYBRID", "license": "CCBY", "disclaimer": "Notice: This snippet is extracted from the open access paper or abstract available at https://aclanthology.org/2020.acl-main.447, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "fieldsOfStudy": { "type": "array", "description": "A list of the paper\u2019s high-level academic categories from external sources. The possible fields are: Computer Science, Medicine, Chemistry, Biology, Materials Science, Physics, Geology, Psychology, Art, History, Geography, Sociology, Business, Political Science, Economics, Philosophy, Mathematics, Engineering, Environmental Science, Agricultural and Food Sciences, Education, Law, and Linguistics.", "example": [ "Computer Science" ], "items": { "type": "string" } }, "s2FieldsOfStudy": { "type": "array", "description": "An array of objects. Each object contains the following parameters: category (a field of study. The possible fields are the same as in fieldsOfStudy), and source (specifies whether the category was classified by Semantic Scholar or by an external source. More information on how Semantic Scholar classifies papers https://medium.com/ai2-blog/announcing-s2fos-an-open-source-academic-field-of-study-classifier-9d2f641949e5)", "example": [ { "category": "Computer Science", "source": "external" }, { "category": "Computer Science", "source": "s2-fos-model" }, { "category": "Mathematics", "source": "s2-fos-model" } ], "items": { "type": "object" } }, "publicationTypes": { "type": "array", "description": "The type of this publication.", "example": [ "Journal Article", "Review" ], "items": { "type": "string" } }, "publicationDate": { "type": "string", "description": "The date when this paper was published, in YYYY-MM-DD format.", "example": "2024-04-29" }, "journal": { "type": "object", "description": "An object that contains the following parameters, if available: name (the journal name), volume (the journal\u2019s volume number), and pages (the page number range)", "example": { "volume": "40", "pages": "116 - 135", "name": "IETE Technical Review" } }, "citationStyles": { "type": "object", "description": "The BibTex bibliographical citation of the paper.", "example": { "bibtex": "@['JournalArticle', 'Conference']{Ammar2018ConstructionOT,\n author = {Waleed Ammar and Dirk Groeneveld and Chandra Bhagavatula and Iz Beltagy and Miles Crawford and Doug Downey and Jason Dunkelberger and Ahmed Elgohary and Sergey Feldman and Vu A. Ha and Rodney Michael Kinney and Sebastian Kohlmeier and Kyle Lo and Tyler C. Murray and Hsu-Han Ooi and Matthew E. Peters and Joanna L. Power and Sam Skjonsberg and Lucy Lu Wang and Christopher Wilhelm and Zheng Yuan and Madeleine van Zuylen and Oren Etzioni},\n booktitle = {NAACL},\n pages = {84-91},\n title = {Construction of the Literature Graph in Semantic Scholar},\n year = {2018}\n}\n" } }, "authors": { "type": "array", "items": { "title": "Author Info", "allOf": [ { "$ref": "#/definitions/AuthorInPaper" } ] } }, "citations": { "type": "array", "title": "Paper Info", "items": { "$ref": "#/definitions/BasePaper" } }, "references": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } }, "embedding": { "$ref": "#/definitions/Embedding" }, "tldr": { "$ref": "#/definitions/Tldr" }, "textAvailability": { "type": "string", "description": "fulltext, abstract, or none, based on what we have available for this paper" } }, "type": "object", "definitions": { "Error404": { "properties": { "error": { "type": "string", "description": "Depending on the case, error message may be any of these:\n", "example": "Requested object not found" } }, "type": "object" }, "Error400": { "properties": { "error": { "type": "string", "description": "Depending on the case, error message may be any of these:\n", "example": "Unrecognized or unsupported fields: [author.creditCardNumber, garbage]" } }, "type": "object" }, "FullPaper": { "properties": { "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "externalIds": { "type": "object", "description": "An object that contains the paper\u2019s unique identifiers in external sources. The external sources are limited to: ArXiv, MAG, ACL, PubMed, Medline, PubMedCentral, DBLP, and DOI.", "example": { "MAG": "3015453090", "DBLP": "conf/acl/LoWNKW20", "ACL": "2020.acl-main.447", "DOI": "10.18653/V1/2020.ACL-MAIN.447", "CorpusId": 215416146 } }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "abstract": { "type": "string", "description": "The paper's abstract. Note that due to legal reasons, this may be missing even if we display an abstract on the website.", "example": "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery." }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "referenceCount": { "type": "integer", "description": "The total number of papers this paper references.", "example": 59 }, "citationCount": { "type": "integer", "description": "The total number of papers that references this paper.", "example": 453 }, "influentialCitationCount": { "type": "integer", "description": "A subset of the citation count, where the cited publication has a significant impact on the citing publication. Determined by Semantic Scholar\u2019s algorithm: https://www.semanticscholar.org/faq#influential-citations.", "example": 90 }, "isOpenAccess": { "type": "boolean", "description": "Whether the paper is open access. More information here: https://www.openaccess.nl/en/what-is-open-access.", "example": true }, "openAccessPdf": { "type": "object", "description": "An object that contains the following parameters: url (a link to the paper\u2019s PDF), status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system), the paper's license, and a legal disclaimer.", "example": { "url": "https://www.aclweb.org/anthology/2020.acl-main.447.pdf", "status": "HYBRID", "license": "CCBY", "disclaimer": "Notice: This snippet is extracted from the open access paper or abstract available at https://aclanthology.org/2020.acl-main.447, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "fieldsOfStudy": { "type": "array", "description": "A list of the paper\u2019s high-level academic categories from external sources. The possible fields are: Computer Science, Medicine, Chemistry, Biology, Materials Science, Physics, Geology, Psychology, Art, History, Geography, Sociology, Business, Political Science, Economics, Philosophy, Mathematics, Engineering, Environmental Science, Agricultural and Food Sciences, Education, Law, and Linguistics.", "example": [ "Computer Science" ], "items": { "type": "string" } }, "s2FieldsOfStudy": { "type": "array", "description": "An array of objects. Each object contains the following parameters: category (a field of study. The possible fields are the same as in fieldsOfStudy), and source (specifies whether the category was classified by Semantic Scholar or by an external source. More information on how Semantic Scholar classifies papers https://medium.com/ai2-blog/announcing-s2fos-an-open-source-academic-field-of-study-classifier-9d2f641949e5)", "example": [ { "category": "Computer Science", "source": "external" }, { "category": "Computer Science", "source": "s2-fos-model" }, { "category": "Mathematics", "source": "s2-fos-model" } ], "items": { "type": "object" } }, "publicationTypes": { "type": "array", "description": "The type of this publication.", "example": [ "Journal Article", "Review" ], "items": { "type": "string" } }, "publicationDate": { "type": "string", "description": "The date when this paper was published, in YYYY-MM-DD format.", "example": "2024-04-29" }, "journal": { "type": "object", "description": "An object that contains the following parameters, if available: name (the journal name), volume (the journal\u2019s volume number), and pages (the page number range)", "example": { "volume": "40", "pages": "116 - 135", "name": "IETE Technical Review" } }, "citationStyles": { "type": "object", "description": "The BibTex bibliographical citation of the paper.", "example": { "bibtex": "@['JournalArticle', 'Conference']{Ammar2018ConstructionOT,\n author = {Waleed Ammar and Dirk Groeneveld and Chandra Bhagavatula and Iz Beltagy and Miles Crawford and Doug Downey and Jason Dunkelberger and Ahmed Elgohary and Sergey Feldman and Vu A. Ha and Rodney Michael Kinney and Sebastian Kohlmeier and Kyle Lo and Tyler C. Murray and Hsu-Han Ooi and Matthew E. Peters and Joanna L. Power and Sam Skjonsberg and Lucy Lu Wang and Christopher Wilhelm and Zheng Yuan and Madeleine van Zuylen and Oren Etzioni},\n booktitle = {NAACL},\n pages = {84-91},\n title = {Construction of the Literature Graph in Semantic Scholar},\n year = {2018}\n}\n" } }, "authors": { "type": "array", "items": { "title": "Author Info", "allOf": [ { "$ref": "#/definitions/AuthorInPaper" } ] } }, "citations": { "type": "array", "title": "Paper Info", "items": { "$ref": "#/definitions/BasePaper" } }, "references": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } }, "embedding": { "$ref": "#/definitions/Embedding" }, "tldr": { "$ref": "#/definitions/Tldr" }, "textAvailability": { "type": "string", "description": "fulltext, abstract, or none, based on what we have available for this paper" } }, "type": "object" }, "AuthorInPaper": { "properties": { "authorId": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Semantic Scholar\u2019s unique ID for the author.", "example": "1741101" }, "externalIds": { "type": "object", "description": "An object that contains the ORCID/DBLP IDs for the author, if known.", "example": { "DBLP": [ 123 ] } }, "url": { "type": "string", "description": "URL of the author on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/author/1741101" }, "name": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Author\u2019s name.", "example": "Oren Etzioni" }, "affiliations": { "type": "array", "description": "Array of organizational affiliations for the author.", "example": [ "Allen Institute for AI" ], "items": { "type": "string" } }, "homepage": { "type": "string", "description": "The author\u2019s homepage.", "example": "https://allenai.org/" }, "paperCount": { "type": "string", "description": "The author's total publications count.", "example": 10 }, "citationCount": { "type": "string", "description": "The author's total citations count.", "example": 50 }, "hIndex": { "type": "string", "description": "The author\u2019s h-index, which is a measure of the productivity and citation impact of the author\u2019s publications: https://www.semanticscholar.org/faq#h-index", "example": 5 }, "normalizedAffiliations": { "type": "array", "description": "Array of organizational ROR-based normalized affiliations for the author (ROR - Research Organization Registry).", "example": [ { "rorId": "https://ror.org/05w520734", "rorDisplayName": "Allen Institute for Artificial Intelligence" } ], "items": { "$ref": "#/definitions/NormalizedAffiliation" } } }, "type": "object" }, "NormalizedAffiliation": { "properties": { "rorId": { "type": "string", "description": "ROR (Research Organization Registry) unique ID.", "example": "https://ror.org/05w520734" }, "rorDisplayName": { "type": "string", "description": "Official ROR display name.", "example": "Allen Institute for Artificial Intelligence" } }, "type": "object" }, "BasePaper": { "properties": { "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "externalIds": { "type": "object", "description": "An object that contains the paper\u2019s unique identifiers in external sources. The external sources are limited to: ArXiv, MAG, ACL, PubMed, Medline, PubMedCentral, DBLP, and DOI.", "example": { "MAG": "3015453090", "DBLP": "conf/acl/LoWNKW20", "ACL": "2020.acl-main.447", "DOI": "10.18653/V1/2020.ACL-MAIN.447", "CorpusId": 215416146 } }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "abstract": { "type": "string", "description": "The paper's abstract. Note that due to legal reasons, this may be missing even if we display an abstract on the website.", "example": "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery." }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "referenceCount": { "type": "integer", "description": "The total number of papers this paper references.", "example": 59 }, "citationCount": { "type": "integer", "description": "The total number of papers that references this paper.", "example": 453 }, "influentialCitationCount": { "type": "integer", "description": "A subset of the citation count, where the cited publication has a significant impact on the citing publication. Determined by Semantic Scholar\u2019s algorithm: https://www.semanticscholar.org/faq#influential-citations.", "example": 90 }, "isOpenAccess": { "type": "boolean", "description": "Whether the paper is open access. More information here: https://www.openaccess.nl/en/what-is-open-access.", "example": true }, "openAccessPdf": { "type": "object", "description": "An object that contains the following parameters: url (a link to the paper\u2019s PDF), status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system), the paper's license, and a legal disclaimer.", "example": { "url": "https://www.aclweb.org/anthology/2020.acl-main.447.pdf", "status": "HYBRID", "license": "CCBY", "disclaimer": "Notice: This snippet is extracted from the open access paper or abstract available at https://aclanthology.org/2020.acl-main.447, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "fieldsOfStudy": { "type": "array", "description": "A list of the paper\u2019s high-level academic categories from external sources. The possible fields are: Computer Science, Medicine, Chemistry, Biology, Materials Science, Physics, Geology, Psychology, Art, History, Geography, Sociology, Business, Political Science, Economics, Philosophy, Mathematics, Engineering, Environmental Science, Agricultural and Food Sciences, Education, Law, and Linguistics.", "example": [ "Computer Science" ], "items": { "type": "string" } }, "s2FieldsOfStudy": { "type": "array", "description": "An array of objects. Each object contains the following parameters: category (a field of study. The possible fields are the same as in fieldsOfStudy), and source (specifies whether the category was classified by Semantic Scholar or by an external source. More information on how Semantic Scholar classifies papers https://medium.com/ai2-blog/announcing-s2fos-an-open-source-academic-field-of-study-classifier-9d2f641949e5)", "example": [ { "category": "Computer Science", "source": "external" }, { "category": "Computer Science", "source": "s2-fos-model" }, { "category": "Mathematics", "source": "s2-fos-model" } ], "items": { "type": "object" } }, "publicationTypes": { "type": "array", "description": "The type of this publication.", "example": [ "Journal Article", "Review" ], "items": { "type": "string" } }, "publicationDate": { "type": "string", "description": "The date when this paper was published, in YYYY-MM-DD format.", "example": "2024-04-29" }, "journal": { "type": "object", "description": "An object that contains the following parameters, if available: name (the journal name), volume (the journal\u2019s volume number), and pages (the page number range)", "example": { "volume": "40", "pages": "116 - 135", "name": "IETE Technical Review" } }, "citationStyles": { "type": "object", "description": "The BibTex bibliographical citation of the paper.", "example": { "bibtex": "@['JournalArticle', 'Conference']{Ammar2018ConstructionOT,\n author = {Waleed Ammar and Dirk Groeneveld and Chandra Bhagavatula and Iz Beltagy and Miles Crawford and Doug Downey and Jason Dunkelberger and Ahmed Elgohary and Sergey Feldman and Vu A. Ha and Rodney Michael Kinney and Sebastian Kohlmeier and Kyle Lo and Tyler C. Murray and Hsu-Han Ooi and Matthew E. Peters and Joanna L. Power and Sam Skjonsberg and Lucy Lu Wang and Christopher Wilhelm and Zheng Yuan and Madeleine van Zuylen and Oren Etzioni},\n booktitle = {NAACL},\n pages = {84-91},\n title = {Construction of the Literature Graph in Semantic Scholar},\n year = {2018}\n}\n" } }, "authors": { "type": "array", "title": "Author Info", "description": "Details about the paper's authors", "items": { "$ref": "#/definitions/AuthorInfo" } } }, "type": "object" }, "AuthorInfo": { "properties": { "authorId": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Semantic Scholar\u2019s unique ID for the author.", "example": "1741101" }, "name": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Author\u2019s name.", "example": "Oren Etzioni" } }, "type": "object" }, "Embedding": { "properties": { "model": { "type": "string", "description": "The Spector vector embedding model version: https://github.com/allenai/specter", "example": "specter@v0.1.1" }, "vector": { "type": "object", "description": "Numerical embedding vector.", "example": [ -8.82082748413086, -2.6610865592956543 ] } }, "type": "object" }, "Tldr": { "properties": { "model": { "type": "string", "description": "The tldr model version number: https://github.com/allenai/scitldr", "example": "tldr@v2.0.0" }, "text": { "type": "string", "description": "The tldr paper summary.", "example": "This paper reduces literature graph construction into familiar NLP tasks, point out research challenges due to differences from standard formulations of these tasks, and report empirical results for each task." } }, "type": "object" }, "PaperBatch": { "properties": { "ids": { "type": "array", "items": { "type": "string", "example": "649def34f8be52c8b66281af98ae884c09aef38b" } } }, "type": "object" }, "CitationBatch": { "properties": { "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/Citation" } ] } } }, "type": "object" }, "Citation": { "properties": { "contexts": { "type": "array", "description": "Array of text snippets where the reference to the paper is mentioned.", "example": [ "SciBERT (Beltagy et al., 2019) follows the BERT\u2019s masking strategy to pre-train the model from scratch using a scientific corpus composed of papers from Semantic Scholar (Ammar et al., 2018).", "27M articles from the Semantic Scholar dataset (Ammar et al., 2018)." ], "items": { "type": "string" } }, "intents": { "type": "array", "description": "Array of citation intents that summarizes how the reference to the paper is mentioned. Possible intents: https://www.semanticscholar.org/faq#citation-intent", "example": [ "methodology" ], "items": { "type": "string" } }, "contextsWithIntent": { "type": "array", "description": "Array of objects that contain both contexts and the intents they are associated with.", "example": [ { "context": "SciBERT (Beltagy et al., 2019) follows the BERT\u2019s ...", "intents": [ "methodology" ] } ], "items": { "type": "object" } }, "isInfluential": { "type": "boolean", "description": "Whether the citing paper is highly influential. See more about influential citations: https://www.semanticscholar.org/faq#influential-citations", "example": false }, "citingPaper": { "description": "Details about the citing paper", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } }, "type": "object" }, "ReferenceBatch": { "properties": { "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/Reference" } ] } } }, "type": "object" }, "Reference": { "properties": { "contexts": { "type": "array", "description": "Array of text snippets where the reference to the paper is mentioned.", "example": [ "SciBERT (Beltagy et al., 2019) follows the BERT\u2019s masking strategy to pre-train the model from scratch using a scientific corpus composed of papers from Semantic Scholar (Ammar et al., 2018).", "27M articles from the Semantic Scholar dataset (Ammar et al., 2018)." ], "items": { "type": "string" } }, "intents": { "type": "array", "description": "Array of citation intents that summarizes how the reference to the paper is mentioned. Possible intents: https://www.semanticscholar.org/faq#citation-intent", "example": [ "methodology" ], "items": { "type": "string" } }, "contextsWithIntent": { "type": "array", "description": "Array of objects that contain both contexts and the intents they are associated with.", "example": [ { "context": "SciBERT (Beltagy et al., 2019) follows the BERT\u2019s ...", "intents": [ "methodology" ] } ], "items": { "type": "object" } }, "isInfluential": { "type": "boolean", "description": "Whether the citing paper is highly influential. See more about influential citations: https://www.semanticscholar.org/faq#influential-citations", "example": false }, "citedPaper": { "description": "Details about the cited paper", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } }, "type": "object" }, "AuthorBatch": { "properties": { "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/AuthorInPaperWithPapers" } ] } } }, "type": "object" }, "AuthorInPaperWithPapers": { "properties": { "authorId": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Semantic Scholar\u2019s unique ID for the author.", "example": "1741101" }, "externalIds": { "type": "object", "description": "An object that contains the ORCID/DBLP IDs for the author, if known.", "example": { "DBLP": [ 123 ] } }, "url": { "type": "string", "description": "URL of the author on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/author/1741101" }, "name": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Author\u2019s name.", "example": "Oren Etzioni" }, "affiliations": { "type": "array", "description": "Array of organizational affiliations for the author.", "example": [ "Allen Institute for AI" ], "items": { "type": "string" } }, "homepage": { "type": "string", "description": "The author\u2019s homepage.", "example": "https://allenai.org/" }, "paperCount": { "type": "string", "description": "The author's total publications count.", "example": 10 }, "citationCount": { "type": "string", "description": "The author's total citations count.", "example": 50 }, "hIndex": { "type": "string", "description": "The author\u2019s h-index, which is a measure of the productivity and citation impact of the author\u2019s publications: https://www.semanticscholar.org/faq#h-index", "example": 5 }, "normalizedAffiliations": { "type": "array", "description": "Array of organizational ROR-based normalized affiliations for the author (ROR - Research Organization Registry).", "example": [ { "rorId": "https://ror.org/05w520734", "rorDisplayName": "Allen Institute for Artificial Intelligence" } ], "items": { "$ref": "#/definitions/NormalizedAffiliation" } }, "papers": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } } }, "type": "object" }, "PaperRelevanceSearchBatch": { "properties": { "total": { "type": "string", "description": "Approximate number of matching search results.\n\nBecause of the subtleties of finding partial phrase matches in different parts of the document, be cautious about interpreting the total field as a count of documents containing any particular word in the query.", "example": 15117 }, "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/FullPaper" } ] } } }, "type": "object" }, "PaperBulkSearchBatch": { "properties": { "total": { "type": "string", "description": "Approximate number of matching search results.\n\nBecause of the subtleties of finding partial phrase matches in different parts of the document, be cautious about interpreting the total field as a count of documents containing any particular word in the query.", "example": 15117 }, "token": { "type": "string", "description": "A continuation token that must be provided to fetch the next page of results. Present only when more results can be fetched.", "example": "SDKJFHSDKFHWIEFSFSGHEIURYC" }, "data": { "type": "array", "items": { "title": "Contents of this page", "allOf": [ { "$ref": "#/definitions/FullPaper" } ] } } }, "type": "object" }, "PaperMatch": { "properties": { "data": { "type": "array", "items": { "$ref": "#/definitions/Title Match Paper" } } }, "type": "object" }, "Title Match Paper": { "properties": { "matchScore": { "type": "integer", "example": 174.2298 }, "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "externalIds": { "type": "object", "description": "An object that contains the paper\u2019s unique identifiers in external sources. The external sources are limited to: ArXiv, MAG, ACL, PubMed, Medline, PubMedCentral, DBLP, and DOI.", "example": { "MAG": "3015453090", "DBLP": "conf/acl/LoWNKW20", "ACL": "2020.acl-main.447", "DOI": "10.18653/V1/2020.ACL-MAIN.447", "CorpusId": 215416146 } }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "abstract": { "type": "string", "description": "The paper's abstract. Note that due to legal reasons, this may be missing even if we display an abstract on the website.", "example": "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery." }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "referenceCount": { "type": "integer", "description": "The total number of papers this paper references.", "example": 59 }, "citationCount": { "type": "integer", "description": "The total number of papers that references this paper.", "example": 453 }, "influentialCitationCount": { "type": "integer", "description": "A subset of the citation count, where the cited publication has a significant impact on the citing publication. Determined by Semantic Scholar\u2019s algorithm: https://www.semanticscholar.org/faq#influential-citations.", "example": 90 }, "isOpenAccess": { "type": "boolean", "description": "Whether the paper is open access. More information here: https://www.openaccess.nl/en/what-is-open-access.", "example": true }, "openAccessPdf": { "type": "object", "description": "An object that contains the following parameters: url (a link to the paper\u2019s PDF), status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system), the paper's license, and a legal disclaimer.", "example": { "url": "https://www.aclweb.org/anthology/2020.acl-main.447.pdf", "status": "HYBRID", "license": "CCBY", "disclaimer": "Notice: This snippet is extracted from the open access paper or abstract available at https://aclanthology.org/2020.acl-main.447, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "fieldsOfStudy": { "type": "array", "description": "A list of the paper\u2019s high-level academic categories from external sources. The possible fields are: Computer Science, Medicine, Chemistry, Biology, Materials Science, Physics, Geology, Psychology, Art, History, Geography, Sociology, Business, Political Science, Economics, Philosophy, Mathematics, Engineering, Environmental Science, Agricultural and Food Sciences, Education, Law, and Linguistics.", "example": [ "Computer Science" ], "items": { "type": "string" } }, "s2FieldsOfStudy": { "type": "array", "description": "An array of objects. Each object contains the following parameters: category (a field of study. The possible fields are the same as in fieldsOfStudy), and source (specifies whether the category was classified by Semantic Scholar or by an external source. More information on how Semantic Scholar classifies papers https://medium.com/ai2-blog/announcing-s2fos-an-open-source-academic-field-of-study-classifier-9d2f641949e5)", "example": [ { "category": "Computer Science", "source": "external" }, { "category": "Computer Science", "source": "s2-fos-model" }, { "category": "Mathematics", "source": "s2-fos-model" } ], "items": { "type": "object" } }, "publicationTypes": { "type": "array", "description": "The type of this publication.", "example": [ "Journal Article", "Review" ], "items": { "type": "string" } }, "publicationDate": { "type": "string", "description": "The date when this paper was published, in YYYY-MM-DD format.", "example": "2024-04-29" }, "journal": { "type": "object", "description": "An object that contains the following parameters, if available: name (the journal name), volume (the journal\u2019s volume number), and pages (the page number range)", "example": { "volume": "40", "pages": "116 - 135", "name": "IETE Technical Review" } }, "citationStyles": { "type": "object", "description": "The BibTex bibliographical citation of the paper.", "example": { "bibtex": "@['JournalArticle', 'Conference']{Ammar2018ConstructionOT,\n author = {Waleed Ammar and Dirk Groeneveld and Chandra Bhagavatula and Iz Beltagy and Miles Crawford and Doug Downey and Jason Dunkelberger and Ahmed Elgohary and Sergey Feldman and Vu A. Ha and Rodney Michael Kinney and Sebastian Kohlmeier and Kyle Lo and Tyler C. Murray and Hsu-Han Ooi and Matthew E. Peters and Joanna L. Power and Sam Skjonsberg and Lucy Lu Wang and Christopher Wilhelm and Zheng Yuan and Madeleine van Zuylen and Oren Etzioni},\n booktitle = {NAACL},\n pages = {84-91},\n title = {Construction of the Literature Graph in Semantic Scholar},\n year = {2018}\n}\n" } }, "authors": { "type": "array", "items": { "title": "Author Info", "allOf": [ { "$ref": "#/definitions/AuthorInPaper" } ] } }, "citations": { "type": "array", "title": "Paper Info", "items": { "$ref": "#/definitions/BasePaper" } }, "references": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } }, "embedding": { "$ref": "#/definitions/Embedding" }, "tldr": { "$ref": "#/definitions/Tldr" }, "textAvailability": { "type": "string", "description": "fulltext, abstract, or none, based on what we have available for this paper" } }, "type": "object" }, "PaperAutocomplete": { "properties": { "matches": { "type": "array", "items": { "$ref": "#/definitions/Autocomplete Paper" } } }, "type": "object" }, "Autocomplete Paper": { "properties": { "id": { "type": "string", "description": "The paper's primary unique identifier.", "example": "649def34f8be52c8b66281af98ae884c09aef38b" }, "title": { "type": "string", "description": "Title of the paper.", "example": "SciBERT: A Pretrained Language Model for Scientific Text" }, "authorsYear": { "type": "string", "description": "Summary of the authors of the paper and year of publication.", "example": "Beltagy et al., 2019" } }, "type": "object" }, "AuthorWithPapers": { "properties": { "authorId": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Semantic Scholar\u2019s unique ID for the author.", "example": "1741101" }, "externalIds": { "type": "object", "description": "An object that contains the ORCID/DBLP IDs for the author, if known.", "example": { "DBLP": [ 123 ] } }, "url": { "type": "string", "description": "URL of the author on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/author/1741101" }, "name": { "type": "string", "title": "This field will be provided if no fields are specified", "description": "Author\u2019s name.", "example": "Oren Etzioni" }, "affiliations": { "type": "array", "description": "Array of organizational affiliations for the author.", "example": [ "Allen Institute for AI" ], "items": { "type": "string" } }, "homepage": { "type": "string", "description": "The author\u2019s homepage.", "example": "https://allenai.org/" }, "paperCount": { "type": "string", "description": "The author's total publications count.", "example": 10 }, "citationCount": { "type": "string", "description": "The author's total citations count.", "example": 50 }, "hIndex": { "type": "string", "description": "The author\u2019s h-index, which is a measure of the productivity and citation impact of the author\u2019s publications: https://www.semanticscholar.org/faq#h-index", "example": 5 }, "papers": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/BasePaper" } ] } } }, "type": "object" }, "AuthorIdList": { "properties": { "ids": { "type": "array", "items": { "type": "string", "example": "1741101" } } }, "type": "object" }, "AuthorPaperBatch": { "properties": { "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/PaperWithLinks" } ] } } }, "type": "object" }, "PaperWithLinks": { "properties": { "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "externalIds": { "type": "object", "description": "An object that contains the paper\u2019s unique identifiers in external sources. The external sources are limited to: ArXiv, MAG, ACL, PubMed, Medline, PubMedCentral, DBLP, and DOI.", "example": { "MAG": "3015453090", "DBLP": "conf/acl/LoWNKW20", "ACL": "2020.acl-main.447", "DOI": "10.18653/V1/2020.ACL-MAIN.447", "CorpusId": 215416146 } }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "abstract": { "type": "string", "description": "The paper's abstract. Note that due to legal reasons, this may be missing even if we display an abstract on the website.", "example": "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery." }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "referenceCount": { "type": "integer", "description": "The total number of papers this paper references.", "example": 59 }, "citationCount": { "type": "integer", "description": "The total number of papers that references this paper.", "example": 453 }, "influentialCitationCount": { "type": "integer", "description": "A subset of the citation count, where the cited publication has a significant impact on the citing publication. Determined by Semantic Scholar\u2019s algorithm: https://www.semanticscholar.org/faq#influential-citations.", "example": 90 }, "isOpenAccess": { "type": "boolean", "description": "Whether the paper is open access. More information here: https://www.openaccess.nl/en/what-is-open-access.", "example": true }, "openAccessPdf": { "type": "object", "description": "An object that contains the following parameters: url (a link to the paper\u2019s PDF), status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system), the paper's license, and a legal disclaimer.", "example": { "url": "https://www.aclweb.org/anthology/2020.acl-main.447.pdf", "status": "HYBRID", "license": "CCBY", "disclaimer": "Notice: This snippet is extracted from the open access paper or abstract available at https://aclanthology.org/2020.acl-main.447, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "fieldsOfStudy": { "type": "array", "description": "A list of the paper\u2019s high-level academic categories from external sources. The possible fields are: Computer Science, Medicine, Chemistry, Biology, Materials Science, Physics, Geology, Psychology, Art, History, Geography, Sociology, Business, Political Science, Economics, Philosophy, Mathematics, Engineering, Environmental Science, Agricultural and Food Sciences, Education, Law, and Linguistics.", "example": [ "Computer Science" ], "items": { "type": "string" } }, "s2FieldsOfStudy": { "type": "array", "description": "An array of objects. Each object contains the following parameters: category (a field of study. The possible fields are the same as in fieldsOfStudy), and source (specifies whether the category was classified by Semantic Scholar or by an external source. More information on how Semantic Scholar classifies papers https://medium.com/ai2-blog/announcing-s2fos-an-open-source-academic-field-of-study-classifier-9d2f641949e5)", "example": [ { "category": "Computer Science", "source": "external" }, { "category": "Computer Science", "source": "s2-fos-model" }, { "category": "Mathematics", "source": "s2-fos-model" } ], "items": { "type": "object" } }, "publicationTypes": { "type": "array", "description": "The type of this publication.", "example": [ "Journal Article", "Review" ], "items": { "type": "string" } }, "publicationDate": { "type": "string", "description": "The date when this paper was published, in YYYY-MM-DD format.", "example": "2024-04-29" }, "journal": { "type": "object", "description": "An object that contains the following parameters, if available: name (the journal name), volume (the journal\u2019s volume number), and pages (the page number range)", "example": { "volume": "40", "pages": "116 - 135", "name": "IETE Technical Review" } }, "citationStyles": { "type": "object", "description": "The BibTex bibliographical citation of the paper.", "example": { "bibtex": "@['JournalArticle', 'Conference']{Ammar2018ConstructionOT,\n author = {Waleed Ammar and Dirk Groeneveld and Chandra Bhagavatula and Iz Beltagy and Miles Crawford and Doug Downey and Jason Dunkelberger and Ahmed Elgohary and Sergey Feldman and Vu A. Ha and Rodney Michael Kinney and Sebastian Kohlmeier and Kyle Lo and Tyler C. Murray and Hsu-Han Ooi and Matthew E. Peters and Joanna L. Power and Sam Skjonsberg and Lucy Lu Wang and Christopher Wilhelm and Zheng Yuan and Madeleine van Zuylen and Oren Etzioni},\n booktitle = {NAACL},\n pages = {84-91},\n title = {Construction of the Literature Graph in Semantic Scholar},\n year = {2018}\n}\n" } }, "authors": { "type": "array", "title": "Author Info", "description": "Details about the paper's authors", "items": { "$ref": "#/definitions/AuthorInfo" } }, "citations": { "type": "array", "title": "Paper Info", "items": { "$ref": "#/definitions/PaperInfo" } }, "references": { "type": "array", "items": { "title": "Paper Info", "allOf": [ { "$ref": "#/definitions/PaperInfo" } ] } } }, "type": "object" }, "PaperInfo": { "properties": { "paperId": { "type": "string", "description": "Semantic Scholar\u2019s primary unique identifier for a paper.", "example": "5c5751d45e298cea054f32b392c12c61027d2fe7" }, "corpusId": { "type": "integer", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": 215416146 }, "url": { "type": "string", "description": "URL of the paper on the Semantic Scholar website.", "example": "https://www.semanticscholar.org/paper/5c5751d45e298cea054f32b392c12c61027d2fe7" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "venue": { "type": "string", "description": "The name of the paper\u2019s publication venue.", "example": "Annual Meeting of the Association for Computational Linguistics" }, "publicationVenue": { "type": "object", "description": "An object that contains the following information about the journal or conference in which this paper was published: id (the venue\u2019s unique ID), name (the venue\u2019s name), type (the type of venue), alternate_names (an array of alternate names for the venue), and url (the venue\u2019s website).", "example": { "id": "1e33b3be-b2ab-46e9-96e8-d4eb4bad6e44", "name": "Annual Meeting of the Association for Computational Linguistics", "type": "conference", "alternate_names": [ "Annu Meet Assoc Comput Linguistics", "Meeting of the Association for Computational Linguistics", "ACL", "Meet Assoc Comput Linguistics" ], "url": "https://www.aclweb.org/anthology/venues/acl/" } }, "year": { "type": "integer", "description": "The year the paper was published.", "example": 1997 }, "authors": { "type": "array", "title": "Author Info", "description": "Details about the paper's authors", "items": { "$ref": "#/definitions/AuthorInfo" } } }, "type": "object" }, "AuthorSearchBatch": { "properties": { "total": { "type": "string", "description": "Approximate number of matching search results.\n\nBecause of the subtleties of finding partial phrase matches in different parts of the document, be cautious about interpreting the total field as a count of documents containing any particular word in the query.", "example": 15117 }, "offset": { "type": "integer", "description": "Starting position for this batch." }, "next": { "type": "integer", "description": "Starting position of the next batch. Absent if no more data exists." }, "data": { "type": "array", "items": { "title": "contents of this batch", "allOf": [ { "$ref": "#/definitions/AuthorWithPapers" } ] } } }, "type": "object" }, "SnippetMatch": { "properties": { "data": { "type": "array", "items": { "$ref": "#/definitions/Snippet Match" } }, "retrievalVersion": { "type": "string", "description": "A rough representation of the retrieval approach we've used to get the results. We'll usually bump this if we change something about how we get results. Note that the same retrievalVersion value doesn't guarantee that you'll get the same results for the same query at different times, and a different retrievalVersion value doesn't always mean you'll get different results." } }, "type": "object" }, "Snippet Match": { "properties": { "snippet": { "$ref": "#/definitions/snippet" }, "score": { "type": "number", "example": 0.561970777028496 }, "paper": { "$ref": "#/definitions/paper" } }, "type": "object" }, "snippet": { "properties": { "text": { "type": "string", "description": "The direct quote or snippet from the paper relevant to the query.", "example": "In this paper, we discuss the construction of a graph, providing a symbolic representation of the scientific literature. We describe deployed models for identifying authors, references and entities in the paper text, and provide experimental results to evaluate the performance of each model. \n\nThree research directions follow from this work and other similar projects, e.g., Hahn-Powell et al. (2017); Wu et al. (2014): i) improving quality and enriching content of the literature graph (e.g., ontology matching and knowledge base population). ii) aggregating domain-specific extractions across many papers to enable a better understanding of the literature as a whole (e.g., identifying demographic biases in clinical trial participants and summarizing empirical results on important tasks). iii) exploring the literature via natural language interfaces. \n\nIn order to help future research efforts, we make the following resources publicly available: metadata for over 20 million papers,10 meaningful citations dataset,11 models for figure and table extraction,12 models for predicting citations in a paper draft 13 and models for extracting paper metadata,14 among other resources." }, "snippetKind": { "type": "string", "description": "Where the snippet is located, options are: title, abstract, or body. ", "example": "body" }, "section": { "type": "string", "description": "Only applies to snippets from the body, refers to the section of the paper where the snippet is located.", "example": "Conclusion and Future Work" }, "snippetOffset": { "type": "object", "description": "The location of the snippet within the paper.", "example": { "start": 24506, "end": 25694 } }, "annotations": { "$ref": "#/definitions/annotations" } }, "type": "object" }, "annotations": { "properties": { "sentences": { "type": "array", "items": { "$ref": "#/definitions/sentence" } }, "refMentions": { "type": "array", "items": { "$ref": "#/definitions/refMention" } } }, "type": "object" }, "sentence": { "properties": { "start": { "type": "integer", "example": 0 }, "end": { "type": "integer", "example": 120 } }, "type": "object" }, "refMention": { "properties": { "start": { "type": "integer", "example": 377 }, "end": { "type": "integer", "example": 402 }, "matchedPaperCorpusId": { "type": "string", "example": "7377848" } }, "type": "object" }, "paper": { "properties": { "corpusId": { "type": "string", "description": "Semantic Scholar\u2019s secondary unique identifier for a paper.", "example": "19170988" }, "title": { "type": "string", "description": "Title of the paper.", "example": "Construction of the Literature Graph in Semantic Scholar" }, "authors": { "type": "array", "items": { "type": "string", "description": "Authors of the paper.", "example": [ "Bridger Waleed Ammar", "Dirk Groeneveld", "Chandra Bhagavatula", "Iz Beltagy", "Miles Crawford", "Doug Downey", "Jason Dunkelberger", "Ahmed Elgohary", "Sergey Feldman", "Vu A. Ha", "Rodney Michael Kinney", "Sebastian Kohlmeier", "Kyle Lo", "Tyler C. Murray", "Hsu-Han Ooi", "Matthew E. Peters", "Joanna L. Power", "Sam Skjonsberg", "Lucy Lu Wang", "Christopher Wilhelm", "Zheng Yuan", "Madeleine van Zuylen", "Oren Etzioni" ] } }, "openAccessInfo": { "$ref": "#/definitions/openAccessInfo" } }, "type": "object" }, "openAccessInfo": { "properties": { "license": { "type": "string", "description": "The license attached to the paper.", "example": "CCBY" }, "status": { "type": "string", "description": "Paper's status (the type of open access https://en.wikipedia.org/wiki/Open_access#Colour_naming_system)", "example": "HYBRID" }, "disclaimer": { "type": "string", "description": "A disclaimer about the open access use of this paper.", "example": "Notice: This snippet is extracted from the open access paper or abstract available at https://arxiv.org/abs/1805.02262, which is subject to the license by the author or copyright owner provided with this content. Please go to the source to verify the license and copyright information for your use." } }, "type": "object" } } }