{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "The following is an example of how to use the library. It uses FreeLing as a tokenizer, TreeTagger as a POS-tagger, and Stanford Shift-Reduce Parser as a parser. For simplicity, all the external tools are searched for via environment variables." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "-------- Stanford -------- \n", "\n", "\n", "D:\\Matias\\Proyecto-PLN\\tree-tagger-windows-3.2\\TreeTagger\\bin\\tag-spanish.bat c:\\users\\matias\\appdata\\local\\temp\\treetagger_input_xs0es_ c:\\users\\matias\\appdata\\local\\temp\\treetagger_output_xk8fbw\n", "(ROOT\n", " (sentence\n", " (spec (grup.cc (R0 Esto es) (DA0000 un)) (NC00000 texto))\n", " (grup.cc\n", " (SP000 de)\n", " (sadv (grup.adv (NC00000 prueba) (conj (Fc ,) (SP000 sin))))\n", " (sp (prep (V0N0000 tokenizar))))))\n", "-------- FreeLing -------- \n", "\n", "\n", "(grup-verb_\n", " (sn_ (pron-ms_ (pdem-ms_ (Esto_este_PD0NS000 ))))\n", " (verb_ (es_ser_VSIP3S0 ))\n", " (sn_\n", " (espec-ms_ (indef-ms_ (un_uno_DI0MS0 )))\n", " (grup-nom-ms_ (n-ms_ (texto_texto_NCMS000 )))\n", " (sp-de_\n", " (de_de_SPS00 )\n", " (sn_ (grup-nom-fs_ (n-fs_ (prueba_prueba_NCFS000 ))))))\n", " (Fc_ (,_,_Fc ))\n", " (grup-sp-inf_\n", " (prep_ (sin_sin_SPS00 ))\n", " (grup-verb-inf_\n", " (infinitiu_ (inf_ (forma-inf_ (tokenizar_tokenizar_VMN0000 )))))))\n", "-------- MaltParser -------- \n", "\n", "\n", "D:\\Matias\\Proyecto-PLN\\tree-tagger-windows-3.2\\TreeTagger\\bin\\tag-spanish.bat c:\\users\\matias\\appdata\\local\\temp\\treetagger_input_ww671m c:\\users\\matias\\appdata\\local\\temp\\treetagger_output_4b5srj\n", "(ROOT (de (Esto es (texto (un ))) (prueba (, )) (tokenizar (sin ))))\n" ] } ], "source": [ "import os\n", "from inco.nlp.parse.maltparser import MaltParser\n", "from inco.nlp.parse.stanford.stanford_shift_reduce import StanfordShiftReduceParser\n", "\n", "from inco.nlp.tag.freeling import FreeLing as FreeLingTagger\n", "from inco.nlp.tag.treetagger import TreeTagger\n", "from inco.nlp.tokenize.freeling import FreeLing as FreeLingTokenizer\n", "from inco.nlp.parse.freeling import FreeLing as FreeLingParser\n", "\n", "text = u\"Esto es un texto de prueba, sin tokenizar\"\n", "\n", "path_treetagger = os.environ['NLP_TREETAGGER']\n", "path_freeling = os.environ['NLP_FREELING']\n", "path_stanford_sr = os.environ['NLP_STANFORDSR']\n", "path_stanford_sr_model = os.environ['NLP_STANFORDSR_MODEL']\n", "path_maltparser = os.environ['NLP_MALTPARSER']\n", "path_maltparser_model = os.environ['NLP_MALTPARSER_MODEL']\n", "\n", "freeling_tagger = FreeLingTagger(path_freeling)\n", "freeling_tokenizer = FreeLingTokenizer(path_freeling)\n", "freeling_parser = FreeLingParser(path_freeling)\n", "\n", "tagger = TreeTagger(path_treetagger, freeling_tokenizer)\n", "stanford_sr_parser = StanfordShiftReduceParser(path_stanford_sr, path_stanford_sr_model, tagger)\n", "maltparser = MaltParser(path_maltparser, path_maltparser_model, tagger)\n", "\n", "tokens = freeling_tokenizer.tokenize(text)\n", "\n", "print \"-------- Stanford -------- \\n\\n\"\n", "print stanford_sr_parser.parse(tokens).next()\n", "print \"\\n\\n-------- FreeLing -------- \\n\\n\"\n", "print freeling_parser.parse(tokens).next()\n", "print \"\\n\\n-------- MaltParser -------- \\n\\n\"\n", "print maltparser.parse(tokens).next()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2.0 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }