{
  "title": "CrowdTruth ground truth for medical relation extraction",
  "description": "A ground truth corpus for medical relation extraction, acquired with crowdsourcing and processed with CrowdTruth metrics.",
  "fields": [{
		"name": "SID",
		"description": "An unique identifier of the data entry.",
		"constraints": {
			"required": true,
			"unique": true
        }
    }, {
		"name": "relation",
		"description": "The medical relation for which the ground truth is collected.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "sentence_relation_score",
		"description": "The sentence relation score of the medical relation; using cosine similarity over the aggregated crowd data, it computes the likelihood that the relation is expressed between the two terms in the sentence.",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#double"	
        }
    }, {
		"name": "crowd",
		"description": "The score used to train the relation extraction classifier by Chang et al. with crowd data; it is the sentence-relation score, with a threshold to select positive and negative examples equal to 0.5, and rescaled in [0.5, 1] for positives, and [-1, -0.5] for negatives.",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#double"	
        }
    }, {
		"name": "baseline",
		"description": "Discrete (positive or negative) labels are given for each data entry by the distant supervision method, based on whether the relation is expressed between the 2 terms in the sentence",
		"constraints": {
			"required": true,
			"pattern": "(-1|1|)"	
        }
    }, {
		"name": "expert",
		"description": "Discrete labels based on an expert’s judgment as to whether the distant supervision label is correct.",
		"constraints": {
			"pattern": "(-1|1|)"
        }
    }, {
		"name": "test_partition",
		"description": "Manual evaluation scores over the sentences where crowd and expert disagreed, used for evaluating the classifier; the sentence-relation score threshold was set at 0.7 for maximum agreement; sentences scored with 0 were determined to be unclear and were removed from testing.",
		"constraints": {
			"pattern": "(-1|0|1|)"	
        }
    }, {
		"name": "term1",
		"description": "The first medical term, after correction with crowdsourcing; together with Term2, it expresses the relation: 'term1 relation term2'.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "b1",
		"description": "The beginning position of Term1 in the sentence, measured in number of characters.",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#int"	
        }
    }, {
		"name": "e1",
		"description": "The ending position of Term1 in the sentence, measured in number of characters.",
		"datatype": "number",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#int"	
        }
    }, {
		"name": "term2",
		"title": "Term2",
		"description": "The second medical term, after correction with crowdsourcing; together with Term1, it expresses the relation: 'term1 relation term2'.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "b2",
		"description": "The beginning position of Term2 in the sentence, measured in number of characters.",
		"datatype": "number",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#int"	
        }
    }, {
		"name": "e2",
		"description": "The ending position of Term2 in the sentence, measured in number of characters.",
		"datatype": "number",
		"constraints": {
			"required": true,
			"type": "http://www.w3.org/2001/XMLSchema#int"	
        }
    }, {
		"name": "sentence",
		"description": "The medical sentence in which the relation is expressed.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "term1_UMLS",
		"description": "The original UMLS version of Term1, used for distant supervision, before correction with crowdsourcing.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "term2_UMLS",
		"description": "The original UMLS version of Term2, used for distant supervision, before correction with crowdsourcing.",
		"constraints": {
			"required": true
        }
    }, {
		"name": "UMLS_seed_relation",
		"description": "The UMLS relation used as a seed in distant supervision to find the given entry.",
		"constraints": {
			"required": true
        }
    }
   ]
}