{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# POO pour données conll\n", "\n", "1. Concevoir un jeu de classes pour parser des données au format conll-u.\n", "2. Parser les données." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Word(object):\n", " \"\"\"\n", " A word in conll-u format\n", " See http://universaldependencies.org/format.html\n", " \"\"\"\n", " \n", " def __init__(self, line):\n", " feats = line.split('\\t')\n", " if len(feats) != 10:\n", " raise WordFormatException(\"A word line must have 10 columns\")\n", " self.nid = feats[0]\n", " self.form = feats[1]\n", " self.lemma = feats[2]\n", " self.upostag = feats[3]\n", " self.xpostag = feats[4]\n", " self.feats = feats[5]\n", " self.head = feats[6]\n", " self.deprel = feats[7]\n", " self.deps = feats[8]\n", " self.misc = feats[9]\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class WordFormatException(Exception):\n", " \"\"\" Unattended word format \"\"\"\n", " def __init__(self, message):\n", " self.message = message" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "w = Word(\"1\tJe\til\tPRON\t_\tNumber=Sing|Person=1|PronType=Prs\t2\tnsubj\t_\")\n", "w.lemma" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 1 }