{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://gitlab.wikimedia.org/diegodlh/w2c-core/-/blob/main/templates.schema.json", "title": "Domain translation templates configuration for Web2Cit", "description": "Each translation template defines translation procedures to extract relevant metadata from a specific template webpage (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Templates)", "type": "array", "format": "tabs-top", "items": { "title": "Translation template", "type": "object", "properties": { "path": { "title": "Path", "description": "Path of the webpage used as translation template", "options": { "infoText": "Do not include the hostname; just the path beginning with \"/\". You may also include query (?) and fragment (#) components. Duplicate translation templates for the same path will be ignored.", "patternmessage": "Path must start with \"/\"" }, "type": "string", "default": "/", "pattern": "^\/.*" }, "label": { "title": "Label", "description": "Fancy name for this translation template", "type": "string" }, "fields": { "title": "Fields", "description": "Each template field defines translation procedures to extract metadata belonging to a specific translation field (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Fields)", "options": { "infoText": "Mandatory fields \"itemType\" and \"title\" must be included, or template will be ignored. Duplicate template fields with the same field name will be ignored.", "disable_array_reorder": true }, "type": "array", "format": "tabs-top", "items": { "title": "Template field", "type": "object", "format": "grid-strict", "properties": { "fieldname": { "title": "Field name", "type": "string", "options": { "hidden": true } }, "required": { "Title": "Required", "description": "Whether the template field is required or not", "options": { "infoText": "Given a target webpage, the output of all required fields must be valid for the template to be applicable. Mandatory fields \"itemType\" and \"title\" are always required.", "options": { "grid_columns": 2 } }, "type": "boolean", "default": true }, "procedures": { "$ref": "#/definitions/procedures", "options": { "grid_columns": 10 } } }, "required": [ "fieldname", "required", "procedures" ], "oneOf": [ { "title": "Item type field (mandatory)", "description": "Type of the cited resource | Valid output: one of the Citoid/Zotero supported types (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Fields)", "properties": { "fieldname": { "enum": ["itemType"] }, "required": { "readOnly": true }, "procedures": { "options": { "infoText": "Fallback \"Item type\" procedure: select \"itemType\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "itemType" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Title field (mandatory)", "description": "Title of the cited resource | Valid output: a single non-empty string", "properties": { "fieldname": { "enum": ["title"] }, "required": { "readOnly": true }, "procedures": { "options": { "infoText": "Fallback \"Title\" procedure: select \"title\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "title" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Author last/full names field", "description": "Authors' last or full names | Valid output: one or more non-empty strings", "properties": { "fieldname": { "enum": ["authorLast"] }, "procedures": { "options": { "infoText": "Fallback \"Author last/full names\" procedure: select \"authorLast\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "authorLast" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Author first names field", "description": "Authors' first names | Valid output: one or more empty or non-empty strings", "properties": { "fieldname": { "enum": ["authorFirst"] }, "procedures": { "options": { "infoText": "Fallback \"Author first names\" procedure: select \"authorFirst\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "authorFirst" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Date field", "description": "Publishing date | Valid output: a single yyyy-mm-dd, yyyy-mm or yyyy value", "properties": { "fieldname": { "enum": ["date"] }, "procedures": { "options": { "infoText": "Falback \"Date\" procedure: select \"date\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "date" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Published in field", "description": "Work containing the cited resource | Valid output: a single non-empty string", "properties": { "fieldname": { "enum": ["publishedIn"] }, "procedures": { "options": { "infoText": "Fallback \"Published in\" procedure: select any of \"publicationTitle\", \"code\" or \"reporter\" fields from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "publicationTitle" }, { "type": "citoid", "config": "code" }, { "type": "citoid", "config": "reporter" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Published by field", "description": "Publisher of the cited resource | Valid output: a single non-empty string", "properties": { "fieldname": { "enum": ["publishedBy"] }, "procedures": { "options": { "infoText": "Fallback \"Published by\" procedure: select \"publisher\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "publisher" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Language field", "description": "Publishing language | Valid output: a single non-empty string; preferably a xx or xx-xx* language code (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Fields)", "properties": { "fieldname": { "enum": ["language"] }, "procedures": { "options": { "infoText": "Fallback \"Language\" procedure: select \"language\" field from Citoid response" }, "items": { "additionalProperties": true, "default": { "selections": [ { "type": "citoid", "config": "language" } ], "transformations": [] } } } }, "additionalProperties": true }, { "title": "Control field", "description": "Template applicability control | Valid output: a single non-empty string", "properties": { "fieldname": { "enum": ["control"] } }, "additionalProperties": true } ] } } }, "required": [ "path", "fields" ] }, "definitions": { "procedures": { "title": "Procedures", "description": "Each translation procedure defines a series of selection and transformation steps to extract and manipulate relevant metadata", "type": "array", "format": "tabs-top", "items": { "$ref": "#/definitions/procedure" } }, "procedure": { "title": "Translation procedure", "type": "object", "format": "categories", "properties": { "selections": { "title": "Selection", "description": "Selection steps select and extract individual elements from the target webpage (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Templates#Selection_steps)", "options": { "infoText": "The combined output of all selection steps is given as input to the first transformation step below." }, "type": "array", "format": "tabs", "items": { "title": "Step", "type": "object", "properties": { "type": { "title": "Type", "type": "string", "options": { "hidden": true } }, "config": { "title": "Configuration", "type": "string" } }, "required": [ "type", "config" ], "anyOf": [ { "title": "Citoid selection", "description": "Selects a field from the Citoid response for the target webpage", "properties": { "type": { "enum": ["citoid"] }, "config": { "description": "Any valid Citoid/Zotero base field name; use Wikimedia REST API (\"citation\" endpoint, \"mediawiki-basefields\" format) to check Citoid response for the target webpage: https://en.wikipedia.org/api/rest_v1/#/Citation/getCitation", "options": { "infoText": "Creator fields (e.g., \"author\") are split into creatorFirst and creatorLast fields." }, "enum": [ "abstractNote", "accessDate", "applicationNumber", "archive", "archiveLocation", "artworkSize", "assignee", "attorneyAgentFirst", "attorneyAgentLast", "authorFirst", "authorLast", "bookAuthorFirst", "bookAuthorLast", "callNumber", "castMemberFirst", "castMemberLast", "code", "codeNumber", "commenterFirst", "commenterLast", "committee", "composerFirst", "composerLast", "conferenceName", "contributorFirst", "contributorLast", "cosponsorFirst", "cosponsorLast", "counselFirst", "counselLast", "country", "court", "date", "DOI", "edition", "editorFirst", "editorLast", "extra", "filingDate", "guestFirst", "guestLast", "history", "interviewerFirst", "interviewerLast", "isbn", "issn", "issue", "issuingAuthority", "itemType", "journalAbbreviation", "language", "legalStatus", "legislativeBody", "libraryCatalog", "medium", "meetingName", "number", "numberOfVolumes", "numPages", "oclc", "pages", "place", "PMCID", "PMID", "priorityNumbers", "producerFirst", "producerLast", "programmingLanguage", "publicationTitle", "publisher", "recipientFirst", "recipientLast", "references", "reporter", "reviewedAuthorFirst", "reviewedAuthorLast", "rights", "runningTime", "scale", "scriptwriterFirst", "scriptwriterLast", "section", "series", "seriesEditorFirst", "seriesEditorLast", "seriesNumber", "seriesText", "seriesTitle", "session", "shortTitle", "system", "tags", "title", "translatorFirst", "translatorLast", "type", "url", "versionNumber", "volume", "wordsByFirst", "wordsByLast" ] } } }, { "title": "XPath selection", "description": "Selects a node from the target webpage's HTML using XPath", "properties": { "type": { "enum": ["xpath"] }, "config": { "description": "Any valid XPath v1.0 expression", "options": { "infoText": "Your web browser's inspector (shown with F12 in some browsers) may help you get an XPath expression for an HTML node." } } } }, { "title": "Fixed selection (Item type field)", "description": "Always returns the same predefined Citoid/Zotero item type", "properties": { "type": { "enum": ["fixed"] }, "config": { "description": "The predefined Citoid/Zotero item type to be returned.", "enum": [ "artwork", "attachment", "audioRecording", "bill", "blogPost", "book", "bookSection", "case", "computerProgram", "conferencePaper", "dictionaryEntry", "document", "email", "encyclopediaArticle", "film", "forumPost", "hearing", "instantMessage", "interview", "journalArticle", "letter", "magazineArticle", "manuscript", "map", "newspaperArticle", "note", "patent", "podcast", "presentation", "radioBroadcast", "report", "statute", "thesis", "tvBroadcast", "videoRecording", "webpage" ] } } }, { "title": "Fixed selection", "description": "Always returns the same predefined value", "type": "object", "properties": { "type": { "title": "Type", "type": "string", "enum": ["fixed"], "options": { "hidden": true } }, "config": { "title": "Configuration", "description": "The predefined value to be returned.", "type": "string" } }, "required": [ "type", "config" ] }, { "title": "JSON-LD selection", "description": "Selects elements from JSON-LD objects present on the target webpage.", "properties": { "type": { "enum": ["json-ld"] }, "config": { "description": "Any valid JMESPath (https://jmespath.org/) expression", "options": { "infoText": "Expressions are evaluated against an array including all JSON-LD objects found on the target webpage." } } } } ] } }, "transformations": { "title": "Transformation", "description": "Transformation steps transform selected elements (if needed) to return the expected output (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Templates#Transformation_steps)", "options": { "infoText": "Transformation steps are applied one after the other, and the output of the last transformation step is the procedure's output." }, "type": "array", "format": "tabs", "items": { "title": "Step", "type": "object", "properties": { "type": { "title": "Type", "type": "string", "options": { "hidden": true } }, "config": { "title": "Configuration", "type": "string" }, "itemwise": { "title": "Item-wise", "description": "Whether transformation should be applied to each item of the input independently (true) or to the entire input as a whole (false).", "type": "boolean" } }, "required": [ "type", "config", "itemwise" ], "oneOf": [ { "title": "Join transformation", "description": "Joins two or more items in a list into one, using the separator specified", "properties": { "type": { "enum": ["join"] }, "config": { "description": "The separator to use." }, "itemwise": { "options": { "infoText": "default: false (join transformation)" }, "default": false } } }, { "title": "Split transformation", "description": "Splits a string at the separator specified into two or more substrings", "properties": { "type": { "enum": ["split"] }, "config": { "description": "The separator to use." }, "itemwise": { "options": { "infoText": "default: true (split transformation)" }, "default": true } } }, { "title": "Date transformation", "description": "Uses the Sugar.js to try and parse natural language dates into the YYYY-MM-DD format", "properties": { "type": { "enum": ["date"] }, "config": { "description": "Any of the currently supported locales", "enum": [ "ca", "da", "de", "en", "es", "fi", "fr", "it", "ja", "ko", "nl", "no", "pl", "pt", "ru", "sv", "zh-CN", "zh-TW" ], "options": { "enum_titles": [ "Catalan (ca)", "Danish (da)", "German (de)", "English (en)", "Spanish (es)", "Finnish (fi)", "French (fr)", "Italian (it)", "Japanese (ja)", "Korean (ko)", "Dutch (nl)", "Norwegian (no)", "Polish (pl)", "Portuguese (pt)", "Russian (ru)", "Swedish (sv)", "Chinese (zh-CN)", "Chinese (zh-TW)" ] } }, "itemwise": { "options": { "infoText": "default: true (date transformation)" }, "default": true } } }, { "title": "Range transformation", "description": "Returns one or more items or ranges of items in the order specified", "properties": { "type": { "enum": ["range"] }, "config": { "description": "One or more comma-separated ranges: \"start(:end)\", \"start:\" or \":end\".", "options": { "infoText": "Ranges use one-based numbering, meaning that the first item is item 1" } }, "itemwise": { "options": { "infoText": "default: false (range transformation)" }, "default": false } } }, { "title": "Match transformation", "description": "Returns one or more substrings matching a target", "properties": { "type": { "enum": ["match"] }, "config": { "description": "The matching target, expressed as either plain string or /regular expression/ (see https://meta.wikimedia.org/wiki/Web2Cit/Docs/Templates#Match_transformation)." }, "itemwise": { "options": { "infoText": "default: true (match transformation)" }, "default": true } } } ] } } }, "required": [ "selections", "transformations" ] } } }