{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import Python libraries\n", "from typing import *\n", "import os\n", "#import ibm_watson\n", "#import ibm_watson.natural_language_understanding_v1 as nlu\n", "#import ibm_cloud_sdk_core\n", "import pandas as pd\n", "import spacy\n", "import sys\n", "from IPython.core.display import display, HTML\n", "import textwrap\n", "\n", "# And of course we need the text_extensions_for_pandas library itself.\n", "_PROJECT_ROOT = \"../..\"\n", "try:\n", " import text_extensions_for_pandas as tp\n", "except ModuleNotFoundError as e:\n", " # If we're running from within the project source tree and the parent Python\n", " # environment doesn't have the text_extensions_for_pandas package, use the\n", " # version in the local source tree.\n", " if not os.getcwd().endswith(\"market\"):\n", " raise e\n", " if _PROJECT_ROOT not in sys.path:\n", " sys.path.insert(0, _PROJECT_ROOT)\n", " import text_extensions_for_pandas as tp\n", " \n", "# Download the SpaCy model if necessary\n", "try:\n", " spacy.load(\"en_core_web_trf\")\n", "except IOError:\n", " raise IOError(\"SpaCy dependency parser not found. Please run \"\n", " \"'python -m spacy download en_core_web_trf', then \"\n", " \"restart JupyterLab.\")\n", "\n", "\n", "if \"IBM_API_KEY\" not in os.environ:\n", " raise ValueError(\"IBM_API_KEY environment variable not set. Please create \"\n", " \"a free instance of IBM Watson Natural Language Understanding \"\n", " \"(see https://www.ibm.com/cloud/watson-natural-language-understanding) \"\n", " \"and set the IBM_API_KEY environment variable to your instance's \"\n", " \"API key value.\")\n", "\n", "api_key = os.environ.get(\"IBM_API_KEY\")\n", "service_url = os.environ.get(\"IBM_SERVICE_URL\") \n", "# natural_language_understanding = ibm_watson.NaturalLanguageUnderstandingV1(\n", "# version=\"2021-01-01\",\n", "# authenticator=ibm_cloud_sdk_core.authenticators.IAMAuthenticator(api_key)\n", "# )\n", "# natural_language_understanding.set_service_url(service_url)\n", "\n", "# Github notebook gists will be this wide: ------------------>\n", "# Screenshots of this notebook should be this wide: ----------------------------->" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# Code from the Github gist at https://gist.github.com/frreiss/038ac63ef20eed323a5637f9ddb2de8d\n", "# Be sure to update this cell if the gist changes!\n", "\n", "import pandas as pd\n", "import text_extensions_for_pandas as tp\n", "import ibm_watson\n", "import ibm_watson.natural_language_understanding_v1 as nlu\n", "import ibm_cloud_sdk_core\n", "\n", "def find_persons_quoted_by_name(doc_url, api_key, service_url) -> pd.DataFrame:\n", " # Ask Watson Natural Language Understanding to run its \"semantic_roles\"\n", " # and \"entities\" models.\n", " natural_language_understanding = ibm_watson.NaturalLanguageUnderstandingV1(\n", " version=\"2021-01-01\",\n", " authenticator=ibm_cloud_sdk_core.authenticators.IAMAuthenticator(api_key)\n", " )\n", " natural_language_understanding.set_service_url(service_url)\n", " nlu_results = natural_language_understanding.analyze(\n", " url=doc_url,\n", " return_analyzed_text=True,\n", " features=nlu.Features(\n", " entities=nlu.EntitiesOptions(mentions=True),\n", " semantic_roles=nlu.SemanticRolesOptions())).get_result()\n", " \n", " # Convert the output of Watson Natural Language Understanding to DataFrames.\n", " dataframes = tp.io.watson.nlu.parse_response(nlu_results)\n", " entity_mentions_df = dataframes[\"entity_mentions\"]\n", " semantic_roles_df = dataframes[\"semantic_roles\"]\n", " \n", " # Extract mentions of person names and company names\n", " person_mentions_df = entity_mentions_df[entity_mentions_df[\"type\"] == \"Person\"]\n", " \n", " # Extract instances of subjects that made statements\n", " quotes_df = semantic_roles_df[semantic_roles_df[\"action.normalized\"] == \"say\"]\n", " subjects_df = quotes_df[[\"subject.text\"]].copy().reset_index(drop=True)\n", " \n", " # Retrieve the full document text from the entity mentions output.\n", " doc_text = entity_mentions_df[\"span\"].array.document_text\n", "\n", " # Filter down to just the rows and columns we're interested in\n", " subjects_df = quotes_df[[\"subject.text\"]].copy().reset_index(drop=True)\n", "\n", " # Use String.index() to find where the strings in \"subject.text\" begin\n", " subjects_df[\"begin\"] = pd.Series(\n", " [doc_text.index(s) for s in subjects_df[\"subject.text\"]], dtype=int)\n", "\n", " # Compute end offsets and wrap the <begin, end, text> triples in a SpanArray column\n", " subjects_df[\"end\"] = subjects_df[\"begin\"] + subjects_df[\"subject.text\"].str.len()\n", " subjects_df[\"span\"] = tp.SpanArray(doc_text, subjects_df[\"begin\"], subjects_df[\"end\"])\n", "\n", " # Align subjects with person names\n", " execs_df = tp.spanner.contain_join(subjects_df[\"span\"], \n", " person_mentions_df[\"span\"],\n", " \"subject\", \"person\")\n", " # Add on the document URL.\n", " execs_df[\"url\"] = doc_url\n", " return execs_df[[\"person\", \"url\"]]\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 2: Using Pandas DataFrames to analyze sentence structure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "*In this article, we show how to use Pandas DataFrames to extract useful structure from the parse trees of English-language sentences.*\n", "\n", "*Dependency parsing* is a natural language processing technique that identifies the relationships between the words that make up a sentence. We can treat these relationships as the edges of a graph. \n", "\n", "For example, here's the graph that a dependency parser produces for the sentence, \"I like natural language processing\":\n", "" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"3dec518c49f948dd96038d8f6f52cbd6-0\" class=\"displacy\" width=\"1100\" height=\"399.5\" direction=\"ltr\" style=\"max-width: none; height: 399.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">I</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">PRP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">like</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">VBP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">natural</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">JJ</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">language</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">processing</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"309.5\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">.</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">.</tspan>\n", "</text>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-3dec518c49f948dd96038d8f6f52cbd6-0-0\" stroke-width=\"2px\" d=\"M70,264.5 C70,177.0 215.0,177.0 215.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-3dec518c49f948dd96038d8f6f52cbd6-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M70,266.5 L62,254.5 78,254.5\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-3dec518c49f948dd96038d8f6f52cbd6-0-1\" stroke-width=\"2px\" d=\"M420,264.5 C420,177.0 565.0,177.0 565.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-3dec518c49f948dd96038d8f6f52cbd6-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M420,266.5 L412,254.5 428,254.5\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-3dec518c49f948dd96038d8f6f52cbd6-0-2\" stroke-width=\"2px\" d=\"M595,264.5 C595,177.0 740.0,177.0 740.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-3dec518c49f948dd96038d8f6f52cbd6-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M595,266.5 L587,254.5 603,254.5\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-3dec518c49f948dd96038d8f6f52cbd6-0-3\" stroke-width=\"2px\" d=\"M245,264.5 C245,89.5 745.0,89.5 745.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-3dec518c49f948dd96038d8f6f52cbd6-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M745.0,266.5 L753.0,254.5 737.0,254.5\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-3dec518c49f948dd96038d8f6f52cbd6-0-4\" stroke-width=\"2px\" d=\"M245,264.5 C245,2.0 925.0,2.0 925.0,264.5\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-3dec518c49f948dd96038d8f6f52cbd6-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M925.0,266.5 L933.0,254.5 917.0,254.5\" fill=\"currentColor\"/>\n", "</g>\n", "</svg></span>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Do not include this cell in the blog post.\n", "# Code to generate the above image\n", "import spacy\n", "\n", "spacy_language_model = spacy.load(\"en_core_web_trf\")\n", "token_features = tp.io.spacy.make_tokens_and_features(\n", " \"I like natural language processing.\", spacy_language_model)\n", "tp.io.spacy.render_parse_tree(token_features)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This graph is always a tree, so we call it the *dependency-based parse tree* of the sentence. We often shorten the phrase \"dependency-based parse tree\" to **dependency parse** or **parse tree**.\n", "\n", "Every word in the sentence (including the period at the end) becomes a node of the parse tree:\n", "\n", "\n", "The most important verb in the sentence\n", "becomes the root of the tree. We call this root node the *head* node. In this example, the head node is the verb \"like\".\n", "\n", "Edges in the tree connect pairs of related words:\n", "\n", "\n", "Each edge is tagged with information about why the words are related. For example, the first two words in the sentence, \"I\" and \"like\", have an `nsubj` relationship. The pronoun \"I\" is the subject for the verb \"like\".\n", "\n", "Dependency parsing is useful because it lets you solve problems with very little code. The parser acts as a universal machine learning model, extracting many facts at once from the text. Pattern matching over the parse tree lets you filter this set of facts down to the ones that are relevant to your application." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# An enterprise application of dependency parsing\n", "\n", "In a [previous article](https://medium.com/@fred.reiss/market-intelligence-with-pandas-and-ibm-watson-natural-language-understanding-a939323a31ea), we showed how to use [Watson Natural Language Understanding](https://www.ibm.com/cloud/watson-natural-language-understanding?cm_mmc=open_source_technology) to find places where a press release quotes an executive by name. In this article, we'll use dependency parsing to associate those names with **job titles**.\n", "\n", "A person's job title is a valuable piece of context. The title can tell you whether the person is an important decision maker. Titles can tell you relationship between different employees at a company. By looking at how titles change over time, you can reconstruct a person's job history." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "45 tokens\n" ] }, { "data": { "text/html": [ "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"78c331ff9312413c8f8896fb916f5663-0\" class=\"displacy\" width=\"7925\" height=\"1012.0\" direction=\"ltr\" style=\"max-width: none; height: 1012.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">"</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">``</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">By</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">IN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">combining</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\">VBG</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">the</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">DT</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">power</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">of</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">IN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">AI</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">with</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">IN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">the</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\">DT</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">flexibility</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">and</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\">CC</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">agility</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">of</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\">IN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">hybrid</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">JJ</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">cloud</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">,</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">our</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\">PRP$</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3025\">clients</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3025\">NNS</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3200\">are</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3200\">VBP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3375\">driving</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3375\">VBG</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3550\">innovation</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3550\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3725\">and</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3725\">CC</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"3900\">digitizing</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"3900\">VBG</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4075\">their</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4075\">PRP$</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4250\">operations</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4250\">NNS</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4425\">at</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4425\">IN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4600\">a</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4600\">DT</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4775\">fast</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4775\">JJ</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"4950\">pace</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"4950\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5125\">,</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5125\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5300\">"</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5300\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5475\">said</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5475\">VBD</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5650\"> </tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5650\">_SP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"5825\">Daniel</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"5825\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6000\">Hernandez</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6000\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6175\">,</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6175\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6350\">general</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6350\">JJ</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6525\">manager</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6525\">NN</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6700\">,</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6700\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"6875\">Data</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"6875\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"7050\">and</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"7050\">CC</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"7225\">AI</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"7225\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"7400\">,</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"7400\">,</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"7575\">IBM</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"7575\">NNP</tspan>\n", "</text>\n", "\n", "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"922.0\">\n", " <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"7750\">.</tspan>\n", " <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"7750\">.</tspan>\n", "</text>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-0\" stroke-width=\"2px\" d=\"M70,877.0 C70,2.0 5475.0,2.0 5475.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M70,879.0 L62,867.0 78,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-1\" stroke-width=\"2px\" d=\"M245,877.0 C245,89.5 3370.0,89.5 3370.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M245,879.0 L237,867.0 253,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-2\" stroke-width=\"2px\" d=\"M245,877.0 C245,789.5 355.0,789.5 355.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pcomp</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M355.0,879.0 L363.0,867.0 347.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-3\" stroke-width=\"2px\" d=\"M595,877.0 C595,789.5 705.0,789.5 705.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M595,879.0 L587,867.0 603,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-4\" stroke-width=\"2px\" d=\"M420,877.0 C420,702.0 710.0,702.0 710.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M710.0,879.0 L718.0,867.0 702.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-5\" stroke-width=\"2px\" d=\"M770,877.0 C770,789.5 880.0,789.5 880.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M880.0,879.0 L888.0,867.0 872.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-6\" stroke-width=\"2px\" d=\"M945,877.0 C945,789.5 1055.0,789.5 1055.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-6\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1055.0,879.0 L1063.0,867.0 1047.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-7\" stroke-width=\"2px\" d=\"M420,877.0 C420,439.5 1250.0,439.5 1250.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-7\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1250.0,879.0 L1258.0,867.0 1242.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-8\" stroke-width=\"2px\" d=\"M1470,877.0 C1470,789.5 1580.0,789.5 1580.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-8\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1470,879.0 L1462,867.0 1478,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-9\" stroke-width=\"2px\" d=\"M1295,877.0 C1295,702.0 1585.0,702.0 1585.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-9\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1585.0,879.0 L1593.0,867.0 1577.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-10\" stroke-width=\"2px\" d=\"M1645,877.0 C1645,789.5 1755.0,789.5 1755.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-10\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1755.0,879.0 L1763.0,867.0 1747.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-11\" stroke-width=\"2px\" d=\"M1645,877.0 C1645,702.0 1935.0,702.0 1935.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-11\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M1935.0,879.0 L1943.0,867.0 1927.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-12\" stroke-width=\"2px\" d=\"M1645,877.0 C1645,614.5 2115.0,614.5 2115.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-12\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M2115.0,879.0 L2123.0,867.0 2107.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-13\" stroke-width=\"2px\" d=\"M2345,877.0 C2345,789.5 2455.0,789.5 2455.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-13\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M2345,879.0 L2337,867.0 2353,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-14\" stroke-width=\"2px\" d=\"M2170,877.0 C2170,702.0 2460.0,702.0 2460.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-14\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M2460.0,879.0 L2468.0,867.0 2452.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-15\" stroke-width=\"2px\" d=\"M2695,877.0 C2695,527.0 3345.0,527.0 3345.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-15\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M2695,879.0 L2687,867.0 2703,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-16\" stroke-width=\"2px\" d=\"M2870,877.0 C2870,789.5 2980.0,789.5 2980.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-16\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">poss</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M2870,879.0 L2862,867.0 2878,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-17\" stroke-width=\"2px\" d=\"M3045,877.0 C3045,702.0 3335.0,702.0 3335.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-17\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3045,879.0 L3037,867.0 3053,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-18\" stroke-width=\"2px\" d=\"M3220,877.0 C3220,789.5 3330.0,789.5 3330.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-18\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3220,879.0 L3212,867.0 3228,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-19\" stroke-width=\"2px\" d=\"M3395,877.0 C3395,264.5 5460.0,264.5 5460.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-19\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">ccomp</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3395,879.0 L3387,867.0 3403,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-20\" stroke-width=\"2px\" d=\"M3395,877.0 C3395,789.5 3505.0,789.5 3505.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-20\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3505.0,879.0 L3513.0,867.0 3497.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-21\" stroke-width=\"2px\" d=\"M3395,877.0 C3395,702.0 3685.0,702.0 3685.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-21\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3685.0,879.0 L3693.0,867.0 3677.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-22\" stroke-width=\"2px\" d=\"M3395,877.0 C3395,614.5 3865.0,614.5 3865.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-22\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M3865.0,879.0 L3873.0,867.0 3857.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-23\" stroke-width=\"2px\" d=\"M4095,877.0 C4095,789.5 4205.0,789.5 4205.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-23\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">poss</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4095,879.0 L4087,867.0 4103,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-24\" stroke-width=\"2px\" d=\"M3920,877.0 C3920,702.0 4210.0,702.0 4210.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-24\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">dobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4210.0,879.0 L4218.0,867.0 4202.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-25\" stroke-width=\"2px\" d=\"M3920,877.0 C3920,614.5 4390.0,614.5 4390.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-25\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4390.0,879.0 L4398.0,867.0 4382.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-26\" stroke-width=\"2px\" d=\"M4620,877.0 C4620,702.0 4910.0,702.0 4910.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-26\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4620,879.0 L4612,867.0 4628,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-27\" stroke-width=\"2px\" d=\"M4795,877.0 C4795,789.5 4905.0,789.5 4905.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-27\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4795,879.0 L4787,867.0 4803,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-28\" stroke-width=\"2px\" d=\"M4445,877.0 C4445,614.5 4915.0,614.5 4915.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-28\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M4915.0,879.0 L4923.0,867.0 4907.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-29\" stroke-width=\"2px\" d=\"M5145,877.0 C5145,702.0 5435.0,702.0 5435.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-29\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M5145,879.0 L5137,867.0 5153,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-30\" stroke-width=\"2px\" d=\"M5320,877.0 C5320,789.5 5430.0,789.5 5430.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-30\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M5320,879.0 L5312,867.0 5328,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-31\" stroke-width=\"2px\" d=\"M5495,877.0 C5495,789.5 5605.0,789.5 5605.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-31\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M5605.0,879.0 L5613.0,867.0 5597.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-32\" stroke-width=\"2px\" d=\"M5845,877.0 C5845,789.5 5955.0,789.5 5955.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-32\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">compound</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M5845,879.0 L5837,867.0 5853,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-33\" stroke-width=\"2px\" d=\"M5495,877.0 C5495,614.5 5965.0,614.5 5965.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-33\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M5965.0,879.0 L5973.0,867.0 5957.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-34\" stroke-width=\"2px\" d=\"M6020,877.0 C6020,789.5 6130.0,789.5 6130.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-34\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M6130.0,879.0 L6138.0,867.0 6122.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-35\" stroke-width=\"2px\" d=\"M6370,877.0 C6370,789.5 6480.0,789.5 6480.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-35\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M6370,879.0 L6362,867.0 6378,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-36\" stroke-width=\"2px\" d=\"M6020,877.0 C6020,614.5 6490.0,614.5 6490.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-36\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M6490.0,879.0 L6498.0,867.0 6482.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-37\" stroke-width=\"2px\" d=\"M6545,877.0 C6545,789.5 6655.0,789.5 6655.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-37\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M6655.0,879.0 L6663.0,867.0 6647.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-38\" stroke-width=\"2px\" d=\"M6545,877.0 C6545,702.0 6835.0,702.0 6835.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-38\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M6835.0,879.0 L6843.0,867.0 6827.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-39\" stroke-width=\"2px\" d=\"M6895,877.0 C6895,789.5 7005.0,789.5 7005.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-39\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">cc</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M7005.0,879.0 L7013.0,867.0 6997.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-40\" stroke-width=\"2px\" d=\"M6895,877.0 C6895,702.0 7185.0,702.0 7185.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-40\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">conj</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M7185.0,879.0 L7193.0,867.0 7177.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-41\" stroke-width=\"2px\" d=\"M6545,877.0 C6545,439.5 7375.0,439.5 7375.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-41\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M7375.0,879.0 L7383.0,867.0 7367.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-42\" stroke-width=\"2px\" d=\"M6545,877.0 C6545,352.0 7555.0,352.0 7555.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-42\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">appos</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M7555.0,879.0 L7563.0,867.0 7547.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "\n", "<g class=\"displacy-arrow\">\n", " <path class=\"displacy-arc\" id=\"arrow-78c331ff9312413c8f8896fb916f5663-0-43\" stroke-width=\"2px\" d=\"M5495,877.0 C5495,177.0 7740.0,177.0 7740.0,877.0\" fill=\"none\" stroke=\"currentColor\"/>\n", " <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n", " <textPath xlink:href=\"#arrow-78c331ff9312413c8f8896fb916f5663-0-43\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">punct</textPath>\n", " </text>\n", " <path class=\"displacy-arrowhead\" d=\"M7740.0,879.0 L7748.0,867.0 7732.0,867.0\" fill=\"currentColor\"/>\n", "</g>\n", "</svg></span>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Don't include this cell in the blog\n", "\n", "# Code to generate parse tree of entire sentence\n", "# Take a screenshot at 25% to create the png version.\n", "quote_text = '''\\\n", "\"By combining the power of AI with the flexibility and agility of hybrid cloud, \\\n", "our clients are driving innovation and digitizing their operations at a fast \\\n", "pace,\" said Daniel Hernandez, general manager, Data and AI, IBM.'''\n", "\n", "tokens = tp.io.spacy.make_tokens_and_features(quote_text, spacy_language_model)\n", "print(f\"{len(tokens.index)} tokens\")\n", "tp.io.spacy.render_parse_tree(tokens)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here's an example of how names and job titles can appear in press releases. This example is from an [IBM press release](https://newsroom.ibm.com/2020-12-02-IBM-Named-a-Leader-in-the-2020-IDC-MarketScape-For-Worldwide-Advanced-Machine-Learning-Software-Platform) from December 2020:\n", "\n", "\n", "\n", "This sentence is 45 words long, so the entire parse tree is a bit daunting...\n", "\n", "\n", "\n", "...but if we zoom in on just the phrase, \"Daniel Hernandez, general manager, Data and AI, IBM,\" some structure becomes clear:\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The arrows in the diagram point \"downwards\", from root to leaf. The entire job title is a child of the name. There's a single edge from the head (highest) node of Daniel Hernandez's name to the head node of his job title. \n", "\n", "The edge types in this parse tree come from the [Universal Dependencies](https://universaldependencies.org/) framework. The edge between the name and job title has the type `appos`. `appos` is short for \"[appositional modifier](https://universaldependencies.org/docs/en/dep/appos.html)\", or [appositive](https://owl.purdue.edu/owl/general_writing/grammar/appositives.html). An appositive is a noun that describes another noun. In this case, the noun phrase \"general manager, Data and AI, IBM\" describes the noun phrase \"Daniel Hernandez\".\n", "\n", "The pattern in the picture above happens whenever a person's job title is an appositive for that person's name. The title will be below the name in the tree, and the head nodes of the name and title will be connected by an `appos` edge. We can use this pattern to find the job title via a three-step process:\n", "\n", "1. Look for an `appos` edge coming out of any of the parse tree nodes for the name.\n", "2. The node at the other end of this edge should be the head node of the job title.\n", "3. Find all the other nodes that are reachable from the head node of the job title.\n", "\n", "Remember that each node represents a word. Once you know all the nodes that make up the job title, you know all the words in the title.\n", "\n", "Step 3 here requires a [*transitive closure*](https://en.wikipedia.org/wiki/Transitive_closure) operation:\n", "* Start with a set of nodes consisting of just the head node\n", "* Look for nodes that are connected to nodes of the set. Add those nodes to the set.\n", "* Repeat the previous step until your set of nodes stops growing.\n", "\n", "We can implement this algorithm with Pandas DataFrames." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Transitive closure with Pandas\n", "\n", "We're going to use Pandas to match person names with job titles. The first thing we'll need is the locations of the person names. In our previous post, we created a function `find_persons_quoted_by_name()` that finds all the people that a news article quotes by name. If you're curious, you can find the source code [here](https://gist.github.com/frreiss/038ac63ef20eed323a5637f9ddb2de8d). The function produces a DataFrame with the location of each person name. Here's the output when you run the function over an [example press release](https://newsroom.ibm.com/2020-12-02-IBM-Named-a-Leader-in-the-2020-IDC-MarketScape-For-Worldwide-Advanced-Machine-Learning-Software-Platform):" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>url</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person \\\n", "0 [1288, 1304): 'Daniel Hernandez' \n", "1 [1838, 1849): 'Curren Katz' \n", "2 [2476, 2486): 'Ritu Jyoti' \n", "\n", " url \n", "0 https://newsroom.ibm.com/2020-12-02-IBM-Named-... \n", "1 https://newsroom.ibm.com/2020-12-02-IBM-Named-... \n", "2 https://newsroom.ibm.com/2020-12-02-IBM-Named-... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "doc_url = \"https://newsroom.ibm.com/2020-12-02-IBM-Named-a-Leader-in-the-2020-IDC-MarketScape-For-Worldwide-Advanced-Machine-Learning-Software-Platform\"\n", "persons = find_persons_quoted_by_name(doc_url, api_key, \n", " service_url)\n", "persons" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The second thing we will need is a parse tree. We'll use the dependency parser from the [SpaCy](https://spacy.io) NLP library. Our open source library [Text Extensions for Pandas](https://ibm.biz/text-extensions-for-pandas) can convert the output of this parser into a DataFrame:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>span</th>\n", " <th>dep</th>\n", " <th>head</th>\n", " <th>sentence</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0</td>\n", " <td>[0, 6): 'ARMONK'</td>\n", " <td>ROOT</td>\n", " <td>0</td>\n", " <td>[0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>[6, 7): ','</td>\n", " <td>punct</td>\n", " <td>0</td>\n", " <td>[0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2</td>\n", " <td>[8, 12): 'N.Y.'</td>\n", " <td>appos</td>\n", " <td>0</td>\n", " <td>[0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", " <td>[12, 13): ','</td>\n", " <td>punct</td>\n", " <td>0</td>\n", " <td>[0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4</td>\n", " <td>[14, 18): 'Dec.'</td>\n", " <td>npadvmod</td>\n", " <td>0</td>\n", " <td>[0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi...</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>761</th>\n", " <td>761</td>\n", " <td>[4248, 4266): 'tballen@us.ibm.com'</td>\n", " <td>appos</td>\n", " <td>751</td>\n", " <td>[4196, 4278): 'Media Contact: Tyler Allen IBM ...</td>\n", " </tr>\n", " <tr>\n", " <th>762</th>\n", " <td>762</td>\n", " <td>[4266, 4267): ''</td>\n", " <td>punct</td>\n", " <td>751</td>\n", " <td>[4196, 4278): 'Media Contact: Tyler Allen IBM ...</td>\n", " </tr>\n", " <tr>\n", " <th>763</th>\n", " <td>763</td>\n", " <td>[4267, 4273): 'SOURCE'</td>\n", " <td>appos</td>\n", " <td>751</td>\n", " <td>[4196, 4278): 'Media Contact: Tyler Allen IBM ...</td>\n", " </tr>\n", " <tr>\n", " <th>764</th>\n", " <td>764</td>\n", " <td>[4274, 4277): 'IBM'</td>\n", " <td>appos</td>\n", " <td>763</td>\n", " <td>[4196, 4278): 'Media Contact: Tyler Allen IBM ...</td>\n", " </tr>\n", " <tr>\n", " <th>765</th>\n", " <td>765</td>\n", " <td>[4277, 4278): ''</td>\n", " <td>punct</td>\n", " <td>751</td>\n", " <td>[4196, 4278): 'Media Contact: Tyler Allen IBM ...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>766 rows × 5 columns</p>\n", "</div>" ], "text/plain": [ " id span dep head \\\n", "0 0 [0, 6): 'ARMONK' ROOT 0 \n", "1 1 [6, 7): ',' punct 0 \n", "2 2 [8, 12): 'N.Y.' appos 0 \n", "3 3 [12, 13): ',' punct 0 \n", "4 4 [14, 18): 'Dec.' npadvmod 0 \n", ".. ... ... ... ... \n", "761 761 [4248, 4266): 'tballen@us.ibm.com' appos 751 \n", "762 762 [4266, 4267): '' punct 751 \n", "763 763 [4267, 4273): 'SOURCE' appos 751 \n", "764 764 [4274, 4277): 'IBM' appos 763 \n", "765 765 [4277, 4278): '' punct 751 \n", "\n", " sentence \n", "0 [0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi... \n", "1 [0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi... \n", "2 [0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi... \n", "3 [0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi... \n", "4 [0, 42): 'ARMONK, N.Y., Dec. 2, 2020 /PRNewswi... \n", ".. ... \n", "761 [4196, 4278): 'Media Contact: Tyler Allen IBM ... \n", "762 [4196, 4278): 'Media Contact: Tyler Allen IBM ... \n", "763 [4196, 4278): 'Media Contact: Tyler Allen IBM ... \n", "764 [4196, 4278): 'Media Contact: Tyler Allen IBM ... \n", "765 [4196, 4278): 'Media Contact: Tyler Allen IBM ... \n", "\n", "[766 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import spacy\n", "import text_extensions_for_pandas as tp\n", "\n", "# The original document had HTML tags. Get the detagged text.\n", "doc_text = persons[\"person\"].array.document_text\n", "\n", "# Run dependency parsing and convert the parse to a DataFrame.\n", "spacy_language_model = spacy.load(\"en_core_web_trf\")\n", "all_token_features = tp.io.spacy.make_tokens_and_features(\n", " doc_text, spacy_language_model)\n", "\n", "# Drop the columns we won't need for this analysis.\n", "tokens = all_token_features[[\"id\", \"span\", \"dep\", \"head\", \n", " \"sentence\"]]\n", "tokens" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This `tokens` DataFrame contains one row for every *token* in the document. The term \"token\" here refers to a part of the document that is a word, an abbreviation, or a piece of punctuation. The columns \"id\", \"dep\" and \"head\" encode the edges of the parse tree.\n", "\n", "Since we're going to be analyzing the parse tree, it's more convenient to have the nodes and edges in separate DataFrames. So let's split `tokens` into DataFrames of nodes and edges:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "nodes = tokens[[\"id\", \"span\"]].reset_index(drop=True)\n", "edges = tokens[[\"id\", \"head\", \"dep\"]].reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>span</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0</td>\n", " <td>[0, 6): 'ARMONK'</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>[6, 7): ','</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2</td>\n", " <td>[8, 12): 'N.Y.'</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", " <td>[12, 13): ','</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4</td>\n", " <td>[14, 18): 'Dec.'</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>761</th>\n", " <td>761</td>\n", " <td>[4248, 4266): 'tballen@us.ibm.com'</td>\n", " </tr>\n", " <tr>\n", " <th>762</th>\n", " <td>762</td>\n", " <td>[4266, 4267): ''</td>\n", " </tr>\n", " <tr>\n", " <th>763</th>\n", " <td>763</td>\n", " <td>[4267, 4273): 'SOURCE'</td>\n", " </tr>\n", " <tr>\n", " <th>764</th>\n", " <td>764</td>\n", " <td>[4274, 4277): 'IBM'</td>\n", " </tr>\n", " <tr>\n", " <th>765</th>\n", " <td>765</td>\n", " <td>[4277, 4278): ''</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>766 rows × 2 columns</p>\n", "</div>" ], "text/plain": [ " id span\n", "0 0 [0, 6): 'ARMONK'\n", "1 1 [6, 7): ','\n", "2 2 [8, 12): 'N.Y.'\n", "3 3 [12, 13): ','\n", "4 4 [14, 18): 'Dec.'\n", ".. ... ...\n", "761 761 [4248, 4266): 'tballen@us.ibm.com'\n", "762 762 [4266, 4267): ''\n", "763 763 [4267, 4273): 'SOURCE'\n", "764 764 [4274, 4277): 'IBM'\n", "765 765 [4277, 4278): ''\n", "\n", "[766 rows x 2 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nodes" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>head</th>\n", " <th>dep</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>ROOT</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>punct</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2</td>\n", " <td>0</td>\n", " <td>appos</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", " <td>0</td>\n", " <td>punct</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4</td>\n", " <td>0</td>\n", " <td>npadvmod</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>761</th>\n", " <td>761</td>\n", " <td>751</td>\n", " <td>appos</td>\n", " </tr>\n", " <tr>\n", " <th>762</th>\n", " <td>762</td>\n", " <td>751</td>\n", " <td>punct</td>\n", " </tr>\n", " <tr>\n", " <th>763</th>\n", " <td>763</td>\n", " <td>751</td>\n", " <td>appos</td>\n", " </tr>\n", " <tr>\n", " <th>764</th>\n", " <td>764</td>\n", " <td>763</td>\n", " <td>appos</td>\n", " </tr>\n", " <tr>\n", " <th>765</th>\n", " <td>765</td>\n", " <td>751</td>\n", " <td>punct</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>766 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " id head dep\n", "0 0 0 ROOT\n", "1 1 0 punct\n", "2 2 0 appos\n", "3 3 0 punct\n", "4 4 0 npadvmod\n", ".. ... ... ...\n", "761 761 751 appos\n", "762 762 751 punct\n", "763 763 751 appos\n", "764 764 763 appos\n", "765 765 751 punct\n", "\n", "[766 rows x 3 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "edges" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will start with the nodes that are parts of person names. To find these nodes, we need to match the person names in `person` with tokens in `nodes`.\n", "\n", "The \"person\" column of `persons` and the \"span\" column in `nodes` both hold *span* data. Spans are a common concept in natural language processing. A span represents a region of the document, usually as begin and end offsets and a reference to the document's text. The span data in these two DataFrames is stored using the `SpanDtype` extension type from Text Extensions for Pandas.\n", "\n", "Text Extensions for Pandas also includes functions for manipulating span data. We can use one of these functions, `overlap_join()`, to find all the places where a token from `nodes` overlaps with a person name from `persons`:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>span</th>\n", " <th>id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>[1288, 1294): 'Daniel'</td>\n", " <td>233</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>[1295, 1304): 'Hernandez'</td>\n", " <td>234</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>[1838, 1844): 'Curren'</td>\n", " <td>335</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>[1845, 1849): 'Katz'</td>\n", " <td>336</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>[2476, 2480): 'Ritu'</td>\n", " <td>462</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>[2481, 2486): 'Jyoti'</td>\n", " <td>463</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person span id\n", "0 [1288, 1304): 'Daniel Hernandez' [1288, 1294): 'Daniel' 233\n", "1 [1288, 1304): 'Daniel Hernandez' [1295, 1304): 'Hernandez' 234\n", "2 [1838, 1849): 'Curren Katz' [1838, 1844): 'Curren' 335\n", "3 [1838, 1849): 'Curren Katz' [1845, 1849): 'Katz' 336\n", "4 [2476, 2486): 'Ritu Jyoti' [2476, 2480): 'Ritu' 462\n", "5 [2476, 2486): 'Ritu Jyoti' [2481, 2486): 'Jyoti' 463" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "person_nodes = (\n", " tp.spanner.overlap_join(persons[\"person\"], nodes[\"span\"],\n", " \"person\", \"span\")\n", " .merge(nodes)\n", ")\n", "person_nodes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This set of nodes defines a starting point for navigating the parse tree. Now we need to look for nodes that are on the other side of an `appos` link. Since the nodes and edges of our graph are Pandas DataFrames, we can use the Pandas `merge()` method to match edges with nodes and walk the graph. Here's a function that finds all the nodes that are one edge away from the nodes in its argument `start_nodes`:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def traverse_edges_once(start_nodes: pd.DataFrame, \n", " edges: pd.DataFrame,\n", " metadata_cols = [\"person\"]) -> pd.DataFrame:\n", " return (\n", " start_nodes[[\"person\", \"id\"]] # Propagate original \"person\" span\n", " .merge(edges, left_on=\"id\", right_on=\"head\", \n", " suffixes=[\"_head\", \"\"])[[\"person\", \"id\"]]\n", " .merge(nodes)\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we can find all the nodes that are reachable by traversing an `appos` link downward from part of a person name:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>id</th>\n", " <th>span</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>237</td>\n", " <td>[1314, 1321): 'manager'</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>338</td>\n", " <td>[1851, 1859): 'Director'</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>467</td>\n", " <td>[2501, 2510): 'president'</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person id span\n", "0 [1288, 1304): 'Daniel Hernandez' 237 [1314, 1321): 'manager'\n", "1 [1838, 1849): 'Curren Katz' 338 [1851, 1859): 'Director'\n", "2 [2476, 2486): 'Ritu Jyoti' 467 [2501, 2510): 'president'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appos_targets = \\\n", " traverse_edges_once(person_nodes, \n", " edges[edges[\"dep\"] == \"appos\"])\n", "appos_targets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Each element of the \"span\" column of `appos_targets` holds the head node of a person's title. To find the remaining nodes of the titles, we'll do the transitive closure operation we described earlier. We use a Pandas DataFrame to store our set of selected nodes. We use the `traverse_edges_once` function to perform each step of walking the tree. Then we use `Pandas.concat()` and `DataFrame.drop_duplicates()` to add the new nodes to our selected set of nodes. The entire algorithm looks like this:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>id</th>\n", " <th>span</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>237</td>\n", " <td>[1314, 1321): 'manager'</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>338</td>\n", " <td>[1851, 1859): 'Director'</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>467</td>\n", " <td>[2501, 2510): 'president'</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>236</td>\n", " <td>[1306, 1313): 'general'</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>238</td>\n", " <td>[1321, 1322): ','</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>239</td>\n", " <td>[1323, 1327): 'Data'</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>242</td>\n", " <td>[1334, 1335): ','</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>243</td>\n", " <td>[1336, 1339): 'IBM'</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>339</td>\n", " <td>[1860, 1862): 'of'</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>343</td>\n", " <td>[1879, 1880): ','</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>345</td>\n", " <td>[1890, 1896): 'Health'</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>465</td>\n", " <td>[2488, 2495): 'program'</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>466</td>\n", " <td>[2496, 2500): 'vice'</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>468</td>\n", " <td>[2510, 2511): ','</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>470</td>\n", " <td>[2515, 2523): 'research'</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>471</td>\n", " <td>[2524, 2528): 'with'</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>240</td>\n", " <td>[1328, 1331): 'and'</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>241</td>\n", " <td>[1332, 1334): 'AI'</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>342</td>\n", " <td>[1876, 1879): 'R&D'</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>344</td>\n", " <td>[1881, 1889): 'Highmark'</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>469</td>\n", " <td>[2512, 2514): 'AI'</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>479</td>\n", " <td>[2573, 2581): 'practice'</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>341</td>\n", " <td>[1868, 1875): 'Science'</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>472</td>\n", " <td>[2529, 2532): 'IDC'</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>476</td>\n", " <td>[2551, 2559): 'research'</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>340</td>\n", " <td>[1863, 1867): 'Data'</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>473</td>\n", " <td>[2532, 2534): ''s'</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>474</td>\n", " <td>[2535, 2543): 'software'</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>475</td>\n", " <td>[2544, 2550): 'market'</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>477</td>\n", " <td>[2560, 2563): 'and'</td>\n", " </tr>\n", " <tr>\n", " <th>27</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>478</td>\n", " <td>[2564, 2572): 'advisory'</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person id span\n", "0 [1288, 1304): 'Daniel Hernandez' 237 [1314, 1321): 'manager'\n", "1 [1838, 1849): 'Curren Katz' 338 [1851, 1859): 'Director'\n", "2 [2476, 2486): 'Ritu Jyoti' 467 [2501, 2510): 'president'\n", "0 [1288, 1304): 'Daniel Hernandez' 236 [1306, 1313): 'general'\n", "1 [1288, 1304): 'Daniel Hernandez' 238 [1321, 1322): ','\n", "2 [1288, 1304): 'Daniel Hernandez' 239 [1323, 1327): 'Data'\n", "3 [1288, 1304): 'Daniel Hernandez' 242 [1334, 1335): ','\n", "4 [1288, 1304): 'Daniel Hernandez' 243 [1336, 1339): 'IBM'\n", "5 [1838, 1849): 'Curren Katz' 339 [1860, 1862): 'of'\n", "6 [1838, 1849): 'Curren Katz' 343 [1879, 1880): ','\n", "7 [1838, 1849): 'Curren Katz' 345 [1890, 1896): 'Health'\n", "8 [2476, 2486): 'Ritu Jyoti' 465 [2488, 2495): 'program'\n", "9 [2476, 2486): 'Ritu Jyoti' 466 [2496, 2500): 'vice'\n", "10 [2476, 2486): 'Ritu Jyoti' 468 [2510, 2511): ','\n", "11 [2476, 2486): 'Ritu Jyoti' 470 [2515, 2523): 'research'\n", "12 [2476, 2486): 'Ritu Jyoti' 471 [2524, 2528): 'with'\n", "13 [1288, 1304): 'Daniel Hernandez' 240 [1328, 1331): 'and'\n", "14 [1288, 1304): 'Daniel Hernandez' 241 [1332, 1334): 'AI'\n", "15 [1838, 1849): 'Curren Katz' 342 [1876, 1879): 'R&D'\n", "16 [1838, 1849): 'Curren Katz' 344 [1881, 1889): 'Highmark'\n", "17 [2476, 2486): 'Ritu Jyoti' 469 [2512, 2514): 'AI'\n", "18 [2476, 2486): 'Ritu Jyoti' 479 [2573, 2581): 'practice'\n", "19 [1838, 1849): 'Curren Katz' 341 [1868, 1875): 'Science'\n", "20 [2476, 2486): 'Ritu Jyoti' 472 [2529, 2532): 'IDC'\n", "21 [2476, 2486): 'Ritu Jyoti' 476 [2551, 2559): 'research'\n", "22 [1838, 1849): 'Curren Katz' 340 [1863, 1867): 'Data'\n", "23 [2476, 2486): 'Ritu Jyoti' 473 [2532, 2534): ''s'\n", "24 [2476, 2486): 'Ritu Jyoti' 474 [2535, 2543): 'software'\n", "25 [2476, 2486): 'Ritu Jyoti' 475 [2544, 2550): 'market'\n", "26 [2476, 2486): 'Ritu Jyoti' 477 [2560, 2563): 'and'\n", "27 [2476, 2486): 'Ritu Jyoti' 478 [2564, 2572): 'advisory'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Start with the root nodes of the titles.\n", "selected_nodes = appos_targets.copy()\n", "\n", "# Transitive closure. \n", "# Keep going as long as the previous round enlarged our set.\n", "previous_num_nodes = 0\n", "while len(selected_nodes.index) > previous_num_nodes:\n", "\n", " # Find all the nodes that are directly reachable from \n", " # the selected set.\n", " addl_nodes = traverse_edges_once(selected_nodes, edges)\n", "\n", " # Merge the new nodes into the selected set.\n", " previous_num_nodes = len(selected_nodes.index)\n", " selected_nodes = (pd.concat([selected_nodes, addl_nodes])\n", " .drop_duplicates())\n", "\n", "selected_nodes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we know the spans of all the words that make up each job title. The \"addition\" operation \n", "for spans is defined as:\n", "```\n", "span1 + span2 = smallest span that contains both span1 and span2\n", "```\n", "We can recover the span of the entire title by \"adding\" spans using Pandas' `groupby()` method:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>title</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>[1306, 1339): 'general manager, Data and AI, IBM'</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>[1851, 1896): 'Director of Data Science R&D, H...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>[2488, 2581): 'program vice president, AI rese...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person \\\n", "0 [1288, 1304): 'Daniel Hernandez' \n", "1 [1838, 1849): 'Curren Katz' \n", "2 [2476, 2486): 'Ritu Jyoti' \n", "\n", " title \n", "0 [1306, 1339): 'general manager, Data and AI, IBM' \n", "1 [1851, 1896): 'Director of Data Science R&D, H... \n", "2 [2488, 2581): 'program vice president, AI rese... " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Aggregate the nodes of each title to find the span of the \n", "# entire title.\n", "titles = (\n", " selected_nodes\n", " .groupby(\"person\")\n", " .aggregate({\"span\": \"sum\"})\n", " .reset_index()\n", " .rename(columns={\"span\": \"title\"})\n", ")\n", "titles" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we have found a job title for each of the executive names in this document!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tying it all together" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's put all of the code we've presented so far into a single function." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Keep the contents of this cell synchronized with the gist at\n", "# https://gist.github.com/frreiss/a731438dda4ac948beca85d3fe167ff3\n", "import pandas as pd\n", "import text_extensions_for_pandas as tp\n", "\n", "def find_titles_of_persons(persons: pd.DataFrame,\n", " spacy_language_model) -> pd.DataFrame:\n", " \"\"\"\n", " :param persons: DataFrame containing information about person names.\n", " :param spacy_language_model: Loaded SpaCy language model with dependency \n", " parsing support.\n", " \n", " :returns: A DataFrame with a row for every title identified and two columns,\n", " \"person\" and \"title\".\n", " \"\"\"\n", " def traverse_edges_once(start_nodes: pd.DataFrame, edges: pd.DataFrame,\n", " metadata_cols = [\"person\"]) -> pd.DataFrame:\n", " return (\n", " start_nodes[[\"person\", \"id\"]] # Propagate original \"person\" span\n", " .merge(edges, left_on=\"id\", right_on=\"head\", \n", " suffixes=[\"_head\", \"\"])[[\"person\", \"id\"]]\n", " .merge(nodes)\n", " )\n", " \n", " if len(persons.index) == 0:\n", " # Special case: Empty input --> empty output\n", " return pd.DataFrame({\n", " \"person\": pd.Series([], dtype=tp.SpanDtype()),\n", " \"title\": pd.Series([], dtype=tp.SpanDtype()),\n", " })\n", " \n", "\n", " # Retrieve the document text from the person spans.\n", " doc_text = persons[\"person\"].array.document_text\n", " \n", " # Run dependency parsing on the text and convert the parse to a DataFrame.\n", " all_token_features = tp.io.spacy.make_tokens_and_features(doc_text, spacy_language_model)\n", "\n", " # Drop the columns we won't need for this analysis.\n", " tokens = all_token_features[[\"id\", \"span\", \"tag\", \"dep\", \"head\", \"sentence\"]]\n", " \n", " # Split the parse tree into nodes and edges and filter the edges.\n", " nodes = tokens[[\"id\", \"span\", \"tag\"]].reset_index(drop=True)\n", " edges = tokens[[\"id\", \"head\", \"dep\"]].reset_index(drop=True)\n", "\n", " # Start with the nodes that are inside person names.\n", " person_nodes = (\n", " tp.spanner.overlap_join(persons[\"person\"], nodes[\"span\"],\n", " \"person\", \"span\")\n", " .merge(nodes)\n", " )\n", " \n", " # Step 1: Follow `appos` edges from the person names\n", " appos_targets = traverse_edges_once(person_nodes, \n", " edges[edges[\"dep\"] == \"appos\"])\n", " \n", " # Step 2: Transitive closure to find all tokens in the titles\n", " selected_nodes = appos_targets.copy()\n", " previous_num_nodes = 0\n", " while len(selected_nodes.index) > previous_num_nodes:\n", "\n", " # Find all the nodes that are directly reachable from our selected set.\n", " addl_nodes = traverse_edges_once(selected_nodes, edges)\n", "\n", " # Merge the new nodes into the selected set\n", " previous_num_nodes = len(selected_nodes.index)\n", " selected_nodes = (pd.concat([selected_nodes, addl_nodes])\n", " .drop_duplicates())\n", "\n", " # Aggregate the nodes of each title to find the span of the entire title.\n", " titles = (\n", " selected_nodes\n", " .groupby(\"person\")\n", " .aggregate({\"span\": \"sum\"})\n", " .reset_index()\n", " .rename(columns={\"span\": \"title\"})\n", " )\n", "\n", " # As of Pandas 1.2.1, groupby() over extension types downgrades them to object \n", " # dtype. Cast back up to the extension type.\n", " titles[\"person\"] = titles[\"person\"].astype(tp.SpanDtype())\n", " \n", " return titles\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If we combine this `find_titles_of_persons()` function with the `find_persons_quoted_by_name()` function we created in our previous post, we can build a data mining pipeline. This pipeline finds the names and titles of executives in corporate press releases. Here's the output that we get if we pass a year's worth of IBM press releases through this pipeline:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Don't include this cell in the blog post.\n", "\n", "# Load press release URLs from a file\n", "with open(\"ibm_press_releases.txt\", \"r\") as f:\n", " lines = [l.strip() for l in f.readlines()]\n", " ibm_press_release_urls = [l for l in lines if len(l) > 0 and l[0] != \"#\"]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>title</th>\n", " <th>url</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>[1977, 1991): 'Wendi Whitmore'</td>\n", " <td>[1993, 2040): 'Vice President, IBM X-Force Thr...</td>\n", " <td>https://newsroom.ibm.com/2020-02-11-IBM-X-Forc...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>[1281, 1292): 'Rob DiCicco'</td>\n", " <td>[1294, 1348): 'PharmD, Deputy Chief Health Off...</td>\n", " <td>https://newsroom.ibm.com/2020-02-18-IBM-Study-...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>[1213, 1229): 'Christoph Herman'</td>\n", " <td>[1231, 1281): 'SVP and Head of SAP HANA Enterp...</td>\n", " <td>https://newsroom.ibm.com/2020-02-19-IBM-Power-...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>[2227, 2242): 'Stephen Leonard'</td>\n", " <td>[2244, 2282): 'General Manager, IBM Cognitive ...</td>\n", " <td>https://newsroom.ibm.com/2020-02-19-IBM-Power-...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>[2289, 2297): 'Bob Lord'</td>\n", " <td>[2299, 2375): 'IBM Senior Vice President of Co...</td>\n", " <td>https://newsroom.ibm.com/2020-02-26-2020-Call-...</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>254</th>\n", " <td>[3114, 3124): 'Mike Doran'</td>\n", " <td>[3126, 3157): 'Worldwide Sales Director at IBM'</td>\n", " <td>https://newsroom.ibm.com/2021-01-25-OVHcloud-t...</td>\n", " </tr>\n", " <tr>\n", " <th>255</th>\n", " <td>[3155, 3169): 'Howard Boville'</td>\n", " <td>[3171, 3210): 'Senior Vice President, IBM Hybr...</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-Luminor-Ba...</td>\n", " </tr>\n", " <tr>\n", " <th>256</th>\n", " <td>[3114, 3126): 'Samuel Brack'</td>\n", " <td>[3127, 3152): 'Co-Founder and CTO at DIA'</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-DIA-Levera...</td>\n", " </tr>\n", " <tr>\n", " <th>257</th>\n", " <td>[3509, 3523): 'Hillery Hunter'</td>\n", " <td>[3525, 3556): 'IBM Fellow, VP & CTO, IBM Cloud'</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-DIA-Levera...</td>\n", " </tr>\n", " <tr>\n", " <th>258</th>\n", " <td>[1487, 1497): 'Ana Zamper'</td>\n", " <td>[1499, 1534): 'Ecosystem Leader, IBM Latin Ame...</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-Latin-Amer...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>259 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " person \\\n", "0 [1977, 1991): 'Wendi Whitmore' \n", "1 [1281, 1292): 'Rob DiCicco' \n", "2 [1213, 1229): 'Christoph Herman' \n", "3 [2227, 2242): 'Stephen Leonard' \n", "4 [2289, 2297): 'Bob Lord' \n", ".. ... \n", "254 [3114, 3124): 'Mike Doran' \n", "255 [3155, 3169): 'Howard Boville' \n", "256 [3114, 3126): 'Samuel Brack' \n", "257 [3509, 3523): 'Hillery Hunter' \n", "258 [1487, 1497): 'Ana Zamper' \n", "\n", " title \\\n", "0 [1993, 2040): 'Vice President, IBM X-Force Thr... \n", "1 [1294, 1348): 'PharmD, Deputy Chief Health Off... \n", "2 [1231, 1281): 'SVP and Head of SAP HANA Enterp... \n", "3 [2244, 2282): 'General Manager, IBM Cognitive ... \n", "4 [2299, 2375): 'IBM Senior Vice President of Co... \n", ".. ... \n", "254 [3126, 3157): 'Worldwide Sales Director at IBM' \n", "255 [3171, 3210): 'Senior Vice President, IBM Hybr... \n", "256 [3127, 3152): 'Co-Founder and CTO at DIA' \n", "257 [3525, 3556): 'IBM Fellow, VP & CTO, IBM Cloud' \n", "258 [1499, 1534): 'Ecosystem Leader, IBM Latin Ame... \n", "\n", " url \n", "0 https://newsroom.ibm.com/2020-02-11-IBM-X-Forc... \n", "1 https://newsroom.ibm.com/2020-02-18-IBM-Study-... \n", "2 https://newsroom.ibm.com/2020-02-19-IBM-Power-... \n", "3 https://newsroom.ibm.com/2020-02-19-IBM-Power-... \n", "4 https://newsroom.ibm.com/2020-02-26-2020-Call-... \n", ".. ... \n", "254 https://newsroom.ibm.com/2021-01-25-OVHcloud-t... \n", "255 https://newsroom.ibm.com/2021-01-26-Luminor-Ba... \n", "256 https://newsroom.ibm.com/2021-01-26-DIA-Levera... \n", "257 https://newsroom.ibm.com/2021-01-26-DIA-Levera... \n", "258 https://newsroom.ibm.com/2021-01-26-Latin-Amer... \n", "\n", "[259 rows x 3 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "to_concat = []\n", "for url in ibm_press_release_urls:\n", " persons = find_persons_quoted_by_name(url, api_key, \n", " service_url)\n", " titles = find_titles_of_persons(persons, \n", " spacy_language_model)\n", " titles[\"url\"] = url\n", " to_concat.append(titles)\n", " \n", "all_titles = pd.concat(to_concat).reset_index(drop=True)\n", "all_titles" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Our pipeline has processed 191 press releases, and it found the names and titles of 259 executives!\n", "\n", "To find out more about the extensions to Pandas that made this possible, check out Text Extensions for Pandas [here](https://ibm.biz/text-extensions-for-pandas).\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>person</th>\n", " <th>title</th>\n", " <th>url</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>209</th>\n", " <td>[1449, 1466): 'Justin Youngblood'</td>\n", " <td>[1468, 1495): 'Vice President IBM Security'</td>\n", " <td>https://newsroom.ibm.com/2020-12-01-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>210</th>\n", " <td>[1133, 1149): 'Daniel Hernandez'</td>\n", " <td>[1151, 1184): 'General Manager, Data and AI, IBM'</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Positi...</td>\n", " </tr>\n", " <tr>\n", " <th>211</th>\n", " <td>[2035, 2048): 'Vitaly Tsivin'</td>\n", " <td>[2050, 2086): 'Executive Vice President of Bus...</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Positi...</td>\n", " </tr>\n", " <tr>\n", " <th>212</th>\n", " <td>[1288, 1304): 'Daniel Hernandez'</td>\n", " <td>[1306, 1339): 'general manager, Data and AI, IBM'</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>213</th>\n", " <td>[1838, 1849): 'Curren Katz'</td>\n", " <td>[1851, 1896): 'Director of Data Science R&D, H...</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>214</th>\n", " <td>[2476, 2486): 'Ritu Jyoti'</td>\n", " <td>[2488, 2581): 'program vice president, AI rese...</td>\n", " <td>https://newsroom.ibm.com/2020-12-02-IBM-Named-...</td>\n", " </tr>\n", " <tr>\n", " <th>215</th>\n", " <td>[813, 825): 'Daniel Stumm'</td>\n", " <td>[827, 861): 'ABB's Head of Indirect Procurement'</td>\n", " <td>https://newsroom.ibm.com/2020-12-03-IBM-Helps-...</td>\n", " </tr>\n", " <tr>\n", " <th>216</th>\n", " <td>[2802, 2816): 'Neil McCormack'</td>\n", " <td>[2818, 2866): 'managing partner - Geo Leader, ...</td>\n", " <td>https://newsroom.ibm.com/2020-12-03-IBM-Helps-...</td>\n", " </tr>\n", " <tr>\n", " <th>217</th>\n", " <td>[3453, 3465): 'Luigi Menzio'</td>\n", " <td>[3467, 3505): 'Services Executive Partner, IBM...</td>\n", " <td>https://newsroom.ibm.com/2020-12-03-Piaggio-Gr...</td>\n", " </tr>\n", " <tr>\n", " <th>218</th>\n", " <td>[2164, 2180): 'Daniel Hernandez'</td>\n", " <td>[2182, 2217): 'General Manager of Data and AI,...</td>\n", " <td>https://newsroom.ibm.com/2020-12-09-IBM-Launch...</td>\n", " </tr>\n", " <tr>\n", " <th>219</th>\n", " <td>[2933, 2945): 'André Tamers'</td>\n", " <td>[2947, 2976): 'owner of De Maison Selections'</td>\n", " <td>https://newsroom.ibm.com/2020-12-10-eProvenanc...</td>\n", " </tr>\n", " <tr>\n", " <th>220</th>\n", " <td>[3508, 3526): 'Robin Grumman-Vogt'</td>\n", " <td>[3528, 3546): 'CEO of eProvenance'</td>\n", " <td>https://newsroom.ibm.com/2020-12-10-eProvenanc...</td>\n", " </tr>\n", " <tr>\n", " <th>221</th>\n", " <td>[1219, 1231): 'Aarti Borkar'</td>\n", " <td>[1233, 1261): 'Vice President, IBM Security'</td>\n", " <td>https://newsroom.ibm.com/2020-12-10-IBM-Collab...</td>\n", " </tr>\n", " <tr>\n", " <th>222</th>\n", " <td>[1410, 1422): 'Nick Kolesch'</td>\n", " <td>[1424, 1482): 'Vice President for Projects, Al...</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-and-Th...</td>\n", " </tr>\n", " <tr>\n", " <th>223</th>\n", " <td>[1999, 2012): 'Manish Chawla'</td>\n", " <td>[2013, 2092): 'Global Industry Managing Direct...</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-and-Th...</td>\n", " </tr>\n", " <tr>\n", " <th>224</th>\n", " <td>[793, 807): 'Michael Jacobs'</td>\n", " <td>[809, 845): 'IBM Offering Manager, Sustainabil...</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-Launch...</td>\n", " </tr>\n", " <tr>\n", " <th>225</th>\n", " <td>[2738, 2753): 'Matt Larsen-Daw'</td>\n", " <td>[2755, 2800): 'Education Manager, World Wide F...</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-Launch...</td>\n", " </tr>\n", " <tr>\n", " <th>226</th>\n", " <td>[3452, 3464): 'Tom Ackerman'</td>\n", " <td>[3466, 3521): 'Vice President for Education, C...</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-Launch...</td>\n", " </tr>\n", " <tr>\n", " <th>227</th>\n", " <td>[441, 452): 'Mark Foster'</td>\n", " <td>[454, 489): 'Senior Vice President, IBM Services'</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-Acquir...</td>\n", " </tr>\n", " <tr>\n", " <th>228</th>\n", " <td>[3126, 3141): 'Jacques Leblanc'</td>\n", " <td>[3143, 3170): 'CEO and founder of Expertus'</td>\n", " <td>https://newsroom.ibm.com/2020-12-15-IBM-Acquir...</td>\n", " </tr>\n", " <tr>\n", " <th>229</th>\n", " <td>[1396, 1411): 'Sridhar Muppidi'</td>\n", " <td>[1413, 1451): 'Chief Technology Officer, IBM S...</td>\n", " <td>https://newsroom.ibm.com/2020-12-17-IBM-Helps-...</td>\n", " </tr>\n", " <tr>\n", " <th>230</th>\n", " <td>[1323, 1332): 'Paul Roma'</td>\n", " <td>[1334, 1368): 'General Manager, IBM Watson Hea...</td>\n", " <td>https://newsroom.ibm.com/2020-12-18-IBM-and-Sa...</td>\n", " </tr>\n", " <tr>\n", " <th>231</th>\n", " <td>[1790, 1804): 'Bill Patterson'</td>\n", " <td>[1806, 1848): 'EVP and GM, CRM Applications at...</td>\n", " <td>https://newsroom.ibm.com/2020-12-18-IBM-and-Sa...</td>\n", " </tr>\n", " <tr>\n", " <th>232</th>\n", " <td>[1119, 1131): 'John Granger'</td>\n", " <td>[1133, 1242): 'Senior Vice President, Cloud Ap...</td>\n", " <td>https://newsroom.ibm.com/2020-12-21-IBM-to-Acq...</td>\n", " </tr>\n", " <tr>\n", " <th>233</th>\n", " <td>[1738, 1754): 'Fernando Herrera'</td>\n", " <td>[1756, 1787): 'Chairman and Founder, Nordcloud'</td>\n", " <td>https://newsroom.ibm.com/2020-12-21-IBM-to-Acq...</td>\n", " </tr>\n", " <tr>\n", " <th>234</th>\n", " <td>[1543, 1557): 'Jay Bellissimo'</td>\n", " <td>[1559, 1612): 'IBM's General Manager, U.S. Pub...</td>\n", " <td>https://newsroom.ibm.com/2020-12-21-IBM-Select...</td>\n", " </tr>\n", " <tr>\n", " <th>235</th>\n", " <td>[1022, 1036): 'Jay Bellissimo'</td>\n", " <td>[1038, 1098): 'IBM's General Manager, U.S. Pub...</td>\n", " <td>https://newsroom.ibm.com/2020-12-22-USDA-Taps-...</td>\n", " </tr>\n", " <tr>\n", " <th>236</th>\n", " <td>[1775, 1794): 'Archana Vemulapalli'</td>\n", " <td>[1796, 1860): 'General Manager, IBM Infrastruc...</td>\n", " <td>https://newsroom.ibm.com/2021-01-04-IBM-Study-...</td>\n", " </tr>\n", " <tr>\n", " <th>237</th>\n", " <td>[864, 885): 'Dr. Corey S. Bradford'</td>\n", " <td>[887, 934): 'Sr., president of Harris-Stowe St...</td>\n", " <td>https://newsroom.ibm.com/2021-01-05-IBM-Provid...</td>\n", " </tr>\n", " <tr>\n", " <th>238</th>\n", " <td>[1681, 1704): 'Valinda Scarbro Kennedy'</td>\n", " <td>[1706, 1755): 'HBCU Program Lead, IBM Global U...</td>\n", " <td>https://newsroom.ibm.com/2021-01-05-IBM-Provid...</td>\n", " </tr>\n", " <tr>\n", " <th>239</th>\n", " <td>[2157, 2172): 'Bashir Bseirani'</td>\n", " <td>[2174, 2188): 'CEO at Avertra'</td>\n", " <td>https://newsroom.ibm.com/2021-01-06-IBM-and-Av...</td>\n", " </tr>\n", " <tr>\n", " <th>240</th>\n", " <td>[1574, 1594): 'Oran Vongsuraphichet'</td>\n", " <td>[1596, 1662): 'Chief Executive Officer of Thai...</td>\n", " <td>https://newsroom.ibm.com/2021-01-06-Thai-Re-la...</td>\n", " </tr>\n", " <tr>\n", " <th>241</th>\n", " <td>[2153, 2170): 'Patama Chantaruck'</td>\n", " <td>[2172, 2221): 'VP for Indochina Expansion and ...</td>\n", " <td>https://newsroom.ibm.com/2021-01-06-Thai-Re-la...</td>\n", " </tr>\n", " <tr>\n", " <th>242</th>\n", " <td>[952, 966): 'Arvind Krishna'</td>\n", " <td>[968, 991): 'Chairman and CEO of IBM'</td>\n", " <td>https://newsroom.ibm.com/2021-01-07-IBM-Appoin...</td>\n", " </tr>\n", " <tr>\n", " <th>243</th>\n", " <td>[596, 607): 'Mark Foster'</td>\n", " <td>[609, 644): 'Senior Vice President, IBM Services'</td>\n", " <td>https://newsroom.ibm.com/2021-01-11-IBM-Acquir...</td>\n", " </tr>\n", " <tr>\n", " <th>244</th>\n", " <td>[1912, 1924): 'Tyler Prince'</td>\n", " <td>[1926, 1996): 'Executive Vice President, World...</td>\n", " <td>https://newsroom.ibm.com/2021-01-11-IBM-Acquir...</td>\n", " </tr>\n", " <tr>\n", " <th>245</th>\n", " <td>[3546, 3560): 'Paul Stillmank'</td>\n", " <td>[3562, 3568): 'CEO of'</td>\n", " <td>https://newsroom.ibm.com/2021-01-11-IBM-Acquir...</td>\n", " </tr>\n", " <tr>\n", " <th>246</th>\n", " <td>[647, 656): 'Darío Gil'</td>\n", " <td>[658, 708): 'Senior Vice President and Directo...</td>\n", " <td>https://newsroom.ibm.com/2021-01-12-IBM-Tops-U...</td>\n", " </tr>\n", " <tr>\n", " <th>247</th>\n", " <td>[3178, 3193): 'Alistair Rennie'</td>\n", " <td>[3195, 3228): 'General Manager of IBM Blockchain'</td>\n", " <td>https://newsroom.ibm.com/2021-01-13-Covalent-T...</td>\n", " </tr>\n", " <tr>\n", " <th>248</th>\n", " <td>[497, 509): 'John Granger'</td>\n", " <td>[511, 620): 'Senior Vice President, Cloud Appl...</td>\n", " <td>https://newsroom.ibm.com/2021-01-14-IBM-Boosts...</td>\n", " </tr>\n", " <tr>\n", " <th>249</th>\n", " <td>[2375, 2386): 'Hamilton Yu'</td>\n", " <td>[2388, 2399): 'CEO of Taos'</td>\n", " <td>https://newsroom.ibm.com/2021-01-14-IBM-Boosts...</td>\n", " </tr>\n", " <tr>\n", " <th>250</th>\n", " <td>[2164, 2180): 'Nourdine Bihmane'</td>\n", " <td>[2182, 2255): 'Head of Decarbonization Busines...</td>\n", " <td>https://newsroom.ibm.com/2021-01-19-Atos-and-I...</td>\n", " </tr>\n", " <tr>\n", " <th>251</th>\n", " <td>[2723, 2731): 'Bob Lord'</td>\n", " <td>[2733, 2779): 'SVP Cognitive Applications and ...</td>\n", " <td>https://newsroom.ibm.com/2021-01-19-Atos-and-I...</td>\n", " </tr>\n", " <tr>\n", " <th>252</th>\n", " <td>[315, 329): 'Arvind Krishna'</td>\n", " <td>[331, 371): 'IBM chairman and chief executive ...</td>\n", " <td>https://newsroom.ibm.com/2021-01-21-IBM-Report...</td>\n", " </tr>\n", " <tr>\n", " <th>253</th>\n", " <td>[2116, 2131): 'James Kavanaugh'</td>\n", " <td>[2133, 2186): 'IBM senior vice president and c...</td>\n", " <td>https://newsroom.ibm.com/2021-01-21-IBM-Report...</td>\n", " </tr>\n", " <tr>\n", " <th>254</th>\n", " <td>[3114, 3124): 'Mike Doran'</td>\n", " <td>[3126, 3157): 'Worldwide Sales Director at IBM'</td>\n", " <td>https://newsroom.ibm.com/2021-01-25-OVHcloud-t...</td>\n", " </tr>\n", " <tr>\n", " <th>255</th>\n", " <td>[3155, 3169): 'Howard Boville'</td>\n", " <td>[3171, 3210): 'Senior Vice President, IBM Hybr...</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-Luminor-Ba...</td>\n", " </tr>\n", " <tr>\n", " <th>256</th>\n", " <td>[3114, 3126): 'Samuel Brack'</td>\n", " <td>[3127, 3152): 'Co-Founder and CTO at DIA'</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-DIA-Levera...</td>\n", " </tr>\n", " <tr>\n", " <th>257</th>\n", " <td>[3509, 3523): 'Hillery Hunter'</td>\n", " <td>[3525, 3556): 'IBM Fellow, VP & CTO, IBM Cloud'</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-DIA-Levera...</td>\n", " </tr>\n", " <tr>\n", " <th>258</th>\n", " <td>[1487, 1497): 'Ana Zamper'</td>\n", " <td>[1499, 1534): 'Ecosystem Leader, IBM Latin Ame...</td>\n", " <td>https://newsroom.ibm.com/2021-01-26-Latin-Amer...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " person \\\n", "209 [1449, 1466): 'Justin Youngblood' \n", "210 [1133, 1149): 'Daniel Hernandez' \n", "211 [2035, 2048): 'Vitaly Tsivin' \n", "212 [1288, 1304): 'Daniel Hernandez' \n", "213 [1838, 1849): 'Curren Katz' \n", "214 [2476, 2486): 'Ritu Jyoti' \n", "215 [813, 825): 'Daniel Stumm' \n", "216 [2802, 2816): 'Neil McCormack' \n", "217 [3453, 3465): 'Luigi Menzio' \n", "218 [2164, 2180): 'Daniel Hernandez' \n", "219 [2933, 2945): 'André Tamers' \n", "220 [3508, 3526): 'Robin Grumman-Vogt' \n", "221 [1219, 1231): 'Aarti Borkar' \n", "222 [1410, 1422): 'Nick Kolesch' \n", "223 [1999, 2012): 'Manish Chawla' \n", "224 [793, 807): 'Michael Jacobs' \n", "225 [2738, 2753): 'Matt Larsen-Daw' \n", "226 [3452, 3464): 'Tom Ackerman' \n", "227 [441, 452): 'Mark Foster' \n", "228 [3126, 3141): 'Jacques Leblanc' \n", "229 [1396, 1411): 'Sridhar Muppidi' \n", "230 [1323, 1332): 'Paul Roma' \n", "231 [1790, 1804): 'Bill Patterson' \n", "232 [1119, 1131): 'John Granger' \n", "233 [1738, 1754): 'Fernando Herrera' \n", "234 [1543, 1557): 'Jay Bellissimo' \n", "235 [1022, 1036): 'Jay Bellissimo' \n", "236 [1775, 1794): 'Archana Vemulapalli' \n", "237 [864, 885): 'Dr. Corey S. Bradford' \n", "238 [1681, 1704): 'Valinda Scarbro Kennedy' \n", "239 [2157, 2172): 'Bashir Bseirani' \n", "240 [1574, 1594): 'Oran Vongsuraphichet' \n", "241 [2153, 2170): 'Patama Chantaruck' \n", "242 [952, 966): 'Arvind Krishna' \n", "243 [596, 607): 'Mark Foster' \n", "244 [1912, 1924): 'Tyler Prince' \n", "245 [3546, 3560): 'Paul Stillmank' \n", "246 [647, 656): 'Darío Gil' \n", "247 [3178, 3193): 'Alistair Rennie' \n", "248 [497, 509): 'John Granger' \n", "249 [2375, 2386): 'Hamilton Yu' \n", "250 [2164, 2180): 'Nourdine Bihmane' \n", "251 [2723, 2731): 'Bob Lord' \n", "252 [315, 329): 'Arvind Krishna' \n", "253 [2116, 2131): 'James Kavanaugh' \n", "254 [3114, 3124): 'Mike Doran' \n", "255 [3155, 3169): 'Howard Boville' \n", "256 [3114, 3126): 'Samuel Brack' \n", "257 [3509, 3523): 'Hillery Hunter' \n", "258 [1487, 1497): 'Ana Zamper' \n", "\n", " title \\\n", "209 [1468, 1495): 'Vice President IBM Security' \n", "210 [1151, 1184): 'General Manager, Data and AI, IBM' \n", "211 [2050, 2086): 'Executive Vice President of Bus... \n", "212 [1306, 1339): 'general manager, Data and AI, IBM' \n", "213 [1851, 1896): 'Director of Data Science R&D, H... \n", "214 [2488, 2581): 'program vice president, AI rese... \n", "215 [827, 861): 'ABB's Head of Indirect Procurement' \n", "216 [2818, 2866): 'managing partner - Geo Leader, ... \n", "217 [3467, 3505): 'Services Executive Partner, IBM... \n", "218 [2182, 2217): 'General Manager of Data and AI,... \n", "219 [2947, 2976): 'owner of De Maison Selections' \n", "220 [3528, 3546): 'CEO of eProvenance' \n", "221 [1233, 1261): 'Vice President, IBM Security' \n", "222 [1424, 1482): 'Vice President for Projects, Al... \n", "223 [2013, 2092): 'Global Industry Managing Direct... \n", "224 [809, 845): 'IBM Offering Manager, Sustainabil... \n", "225 [2755, 2800): 'Education Manager, World Wide F... \n", "226 [3466, 3521): 'Vice President for Education, C... \n", "227 [454, 489): 'Senior Vice President, IBM Services' \n", "228 [3143, 3170): 'CEO and founder of Expertus' \n", "229 [1413, 1451): 'Chief Technology Officer, IBM S... \n", "230 [1334, 1368): 'General Manager, IBM Watson Hea... \n", "231 [1806, 1848): 'EVP and GM, CRM Applications at... \n", "232 [1133, 1242): 'Senior Vice President, Cloud Ap... \n", "233 [1756, 1787): 'Chairman and Founder, Nordcloud' \n", "234 [1559, 1612): 'IBM's General Manager, U.S. Pub... \n", "235 [1038, 1098): 'IBM's General Manager, U.S. Pub... \n", "236 [1796, 1860): 'General Manager, IBM Infrastruc... \n", "237 [887, 934): 'Sr., president of Harris-Stowe St... \n", "238 [1706, 1755): 'HBCU Program Lead, IBM Global U... \n", "239 [2174, 2188): 'CEO at Avertra' \n", "240 [1596, 1662): 'Chief Executive Officer of Thai... \n", "241 [2172, 2221): 'VP for Indochina Expansion and ... \n", "242 [968, 991): 'Chairman and CEO of IBM' \n", "243 [609, 644): 'Senior Vice President, IBM Services' \n", "244 [1926, 1996): 'Executive Vice President, World... \n", "245 [3562, 3568): 'CEO of' \n", "246 [658, 708): 'Senior Vice President and Directo... \n", "247 [3195, 3228): 'General Manager of IBM Blockchain' \n", "248 [511, 620): 'Senior Vice President, Cloud Appl... \n", "249 [2388, 2399): 'CEO of Taos' \n", "250 [2182, 2255): 'Head of Decarbonization Busines... \n", "251 [2733, 2779): 'SVP Cognitive Applications and ... \n", "252 [331, 371): 'IBM chairman and chief executive ... \n", "253 [2133, 2186): 'IBM senior vice president and c... \n", "254 [3126, 3157): 'Worldwide Sales Director at IBM' \n", "255 [3171, 3210): 'Senior Vice President, IBM Hybr... \n", "256 [3127, 3152): 'Co-Founder and CTO at DIA' \n", "257 [3525, 3556): 'IBM Fellow, VP & CTO, IBM Cloud' \n", "258 [1499, 1534): 'Ecosystem Leader, IBM Latin Ame... \n", "\n", " url \n", "209 https://newsroom.ibm.com/2020-12-01-IBM-Named-... \n", "210 https://newsroom.ibm.com/2020-12-02-IBM-Positi... \n", "211 https://newsroom.ibm.com/2020-12-02-IBM-Positi... \n", "212 https://newsroom.ibm.com/2020-12-02-IBM-Named-... \n", "213 https://newsroom.ibm.com/2020-12-02-IBM-Named-... \n", "214 https://newsroom.ibm.com/2020-12-02-IBM-Named-... \n", "215 https://newsroom.ibm.com/2020-12-03-IBM-Helps-... \n", "216 https://newsroom.ibm.com/2020-12-03-IBM-Helps-... \n", "217 https://newsroom.ibm.com/2020-12-03-Piaggio-Gr... \n", "218 https://newsroom.ibm.com/2020-12-09-IBM-Launch... \n", "219 https://newsroom.ibm.com/2020-12-10-eProvenanc... \n", "220 https://newsroom.ibm.com/2020-12-10-eProvenanc... \n", "221 https://newsroom.ibm.com/2020-12-10-IBM-Collab... \n", "222 https://newsroom.ibm.com/2020-12-15-IBM-and-Th... \n", "223 https://newsroom.ibm.com/2020-12-15-IBM-and-Th... \n", "224 https://newsroom.ibm.com/2020-12-15-IBM-Launch... \n", "225 https://newsroom.ibm.com/2020-12-15-IBM-Launch... \n", "226 https://newsroom.ibm.com/2020-12-15-IBM-Launch... \n", "227 https://newsroom.ibm.com/2020-12-15-IBM-Acquir... \n", "228 https://newsroom.ibm.com/2020-12-15-IBM-Acquir... \n", "229 https://newsroom.ibm.com/2020-12-17-IBM-Helps-... \n", "230 https://newsroom.ibm.com/2020-12-18-IBM-and-Sa... \n", "231 https://newsroom.ibm.com/2020-12-18-IBM-and-Sa... \n", "232 https://newsroom.ibm.com/2020-12-21-IBM-to-Acq... \n", "233 https://newsroom.ibm.com/2020-12-21-IBM-to-Acq... \n", "234 https://newsroom.ibm.com/2020-12-21-IBM-Select... \n", "235 https://newsroom.ibm.com/2020-12-22-USDA-Taps-... \n", "236 https://newsroom.ibm.com/2021-01-04-IBM-Study-... \n", "237 https://newsroom.ibm.com/2021-01-05-IBM-Provid... \n", "238 https://newsroom.ibm.com/2021-01-05-IBM-Provid... \n", "239 https://newsroom.ibm.com/2021-01-06-IBM-and-Av... \n", "240 https://newsroom.ibm.com/2021-01-06-Thai-Re-la... \n", "241 https://newsroom.ibm.com/2021-01-06-Thai-Re-la... \n", "242 https://newsroom.ibm.com/2021-01-07-IBM-Appoin... \n", "243 https://newsroom.ibm.com/2021-01-11-IBM-Acquir... \n", "244 https://newsroom.ibm.com/2021-01-11-IBM-Acquir... \n", "245 https://newsroom.ibm.com/2021-01-11-IBM-Acquir... \n", "246 https://newsroom.ibm.com/2021-01-12-IBM-Tops-U... \n", "247 https://newsroom.ibm.com/2021-01-13-Covalent-T... \n", "248 https://newsroom.ibm.com/2021-01-14-IBM-Boosts... \n", "249 https://newsroom.ibm.com/2021-01-14-IBM-Boosts... \n", "250 https://newsroom.ibm.com/2021-01-19-Atos-and-I... \n", "251 https://newsroom.ibm.com/2021-01-19-Atos-and-I... \n", "252 https://newsroom.ibm.com/2021-01-21-IBM-Report... \n", "253 https://newsroom.ibm.com/2021-01-21-IBM-Report... \n", "254 https://newsroom.ibm.com/2021-01-25-OVHcloud-t... \n", "255 https://newsroom.ibm.com/2021-01-26-Luminor-Ba... \n", "256 https://newsroom.ibm.com/2021-01-26-DIA-Levera... \n", "257 https://newsroom.ibm.com/2021-01-26-DIA-Levera... \n", "258 https://newsroom.ibm.com/2021-01-26-Latin-Amer... " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Don't include this cell in the blog.\n", "\n", "# Check the last 50 rows\n", "all_titles[-50:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }