{
    "Name": "PARSEME",
    "Volume": 1847.0,
    "Unit": "sentences",
    "License": "CC BY 4.0",
    "Link": "https://gitlab.com/parseme/parseme_corpus_ar",
    "HF_Link": "",
    "Year": 2022,
    "Domain": [
        "news articles"
    ],
    "Form": "text",
    "Collection_Style": [
        "human annotation"
    ],
    "Description": "Annotated Arabic VMWE corpus.",
    "Ethical_Risks": "Low",
    "Provider": [
        "PARSEME"
    ],
    "Derived_From": [
        "Prague Arabic Dependency Treebank"
    ],
    "Paper_Title": "Annotation d\u2019expressions polylexicales verbales en arabe : validation d\u2019une proc\u00e9dure d\u2019annotation multilingue",
    "Paper_Link": "https://aclanthology.org/2022.jeptalnrecital-taln.27.pdf",
    "Tokenized": true,
    "Host": "GitLab",
    "Access": "Free",
    "Cost": "",
    "Test_Split": false,
    "Tasks": [
        "other"
    ],
    "Venue_Title": "TALN",
    "Venue_Type": "conference",
    "Venue_Name": "TALN",
    "Authors": [
        "Najet Hadj Mohamed",
        "Cherifa Ben Khelil",
        "Agata Savary",
        "Iskandar Keskes",
        "Jean-Yves Antoine",
        "Lamia Belguith Hadrich"
    ],
    "Affiliations": [
        "LIFAT",
        "MIRACL",
        "LISN"
    ],
    "Abstract": "This paper describes our efforts to extend the PARSEME framework to Modern Standard Arabic. The applicability of the PARSEME guidelines was tested by measuring the inter-annotator agreement in the early annotation stage. A subset of 1062 sentences from the Prague Arabic Dependency Treebank PADT was selected and annotated by two Arabic native speakers independently. Following their annotations, a new Arabic corpus with over 1250 annotated VMWEs has been built.",
    "Subsets": [],
    "Dialect": "Modern Standard Arabic",
    "Language": "ar",
    "Script": "Arab",
    "Added_By": "qwen/qwen3.6-35b-a3b"
}