{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# SMASAC - NER with dispalacy\n", "\n", "Before starting, download required model:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "! python -m spacy download en_core_web_sm" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import spacy\n", "from spacy import displacy" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "text = \"Las Vegas victim's family wants shooter's assets seized - CNN #SmartNews Emotional \" \\\n", " \"Jimmy Kimmel rips gun-control foes after Vegas shooting The Onion’s Las Vegas Shooting \" \\\n", " \"shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos at Las Vegas Hospital Trauma Center \" \\\n", " \"shooting: Carla and Jae Unser hugged their children… \" \\\n", " \"YouTube changed its search algorithm after reports revealed it was surfacing inaccurate \" \\\n", " \"Stephen Paddock was 'upbeat, happy' as he bought guns\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load spacy build-in language model, create doc. Use en_core_web_sm for efficienty here." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "nlp = spacy.load('en_core_web_sm')\n", "doc = nlp(text)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Las Vegas 0 9 GPE\n", "CNN 58 61 ORG\n", "SmartNews 63 72 ORG\n", "Jimmy Kimmel 83 95 PERSON\n", "Onion 143 148 ORG\n", "Las Vegas 151 160 GPE\n", "Las Vegas Hospital Trauma Center 232 264 ORG\n", "Carla 275 280 PERSON\n", "YouTube 318 325 ORG\n", "Stephen Paddock 406 421 PERSON\n" ] } ], "source": [ "for ent in doc.ents:\n", " print(ent.text, ent.start_char, ent.end_char, ent.label_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Start the displacy server on port 500 (http://localhost:5000):" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\u001b[93m Serving on port 5000...\u001b[0m\n", " Using the 'ent' visualizer\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "127.0.0.1 - - [22/Apr/2018 14:12:49] \"GET / HTTP/1.1\" 200 4731\n", "127.0.0.1 - - [22/Apr/2018 14:12:50] \"GET /favicon.ico HTTP/1.1\" 200 4731\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " Shutting down server on port 5000.\n", "\n" ] } ], "source": [ "# Start the server, for stopping it, press the stop button in jupyter:\n", "displacy.serve(doc, style='ent')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }