def parse_token(cols):
	header = ["id", "form", "lemma", "upos", "feats", "head", "deprel", "required", "without", "sem_feats", "sem_roles", "adjacency", "identity"]

	ret = {}
	for x, y in zip(header, cols):
		if x in ["id", "feats"]:
			ret[x]=y
		elif x in ["form", "lemma", "upos"]:
			ret[x] = y.split(",")
		elif x in ["required"]:
			ret[x]=bool(int(y))
		else:
			ret[x]=y

	return ret


# TODO: handle more than one construction
def parse(construction_str, mapping=None):

	construction = construction_str.split("\n")

	ret = { "metadata": [],
			"tokens": []
		}

	for line in construction:
		line = line.strip()
		if len(line):
			if line.startswith("#"):
				ret["metadata"].append(line[1:].split("="))
			else:
				new_token = parse_token(line.split("\t"))
				ret["tokens"].append(new_token)

	return ret


if __name__ == "__main__":
	with open("/Users/ludovica/Documents/projects/adoc/cxns_conllc/cxn_68.conllc") as fin:
		res = parse(fin.read())

		print(res["metadata"])
		for tok in res["tokens"]:
			print(tok)