{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from spacy import displacy" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Prepared 22 documents.\n", "They can be accessed using or_texts[n], being n an integer from 0 to 21.\n", "Distribution of documents by language after translation: {'en': 22}\n", "Unique terms found: 3538\n", "Named entities found: 176\n", "Vectors created.\n", "Test: [4 5 0 0 0 2 0 0 0 1 4 4 2 2 1 0 2 4 1 1 1 3]\n", "Reference: [0, 5, 2, 2, 2, 3, 2, 2, 2, 4, 0, 0, 3, 3, 4, 2, 3, 0, 4, 4, 4, 1]\n", "Adjusted Rand Index: 1.0\n" ] } ], "source": [ "%run ./Practica1ObligatoriaRIMT_1819.py" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Rendering named entities visualization of original texts" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "