{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Minimal CoreNLP Scala example\n",
    "\n",
    "You'll need to get CoreNLP jars, for example by loading it as a dependency in some Maven project.\n",
    "\n",
    "We have to use such method because CoreNLP-models has classifier 'models' and currently jupyter-scala doesn't allow classifiers in dependency loading syntax."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ammonite.ops.Path\n",
    "import $ivy.`org.slf4j:slf4j-api:1.7.6`\n",
    "import $ivy.`com.google.protobuf:protobuf-java:3.0.0`\n",
    "import $ivy.`joda-time:joda-time:2.9.4`\n",
    "import $ivy.`de.jollyday:jollyday:0.5.1`\n",
    "\n",
    "val coreNLPVersion = \"3.8.0\"\n",
    "val myHome = \"\"\n",
    "val pathPrefix = s\"$myHome/.m2/repository\"\n",
    "val stanfordPrefix = s\"${pathPrefix}/edu/stanford/nlp/stanford-corenlp/$coreNLPVersion\"\n",
    "\n",
    "interp.load.cp(\n",
    "  Seq(\n",
    "    Path(s\"${stanfordPrefix}/stanford-corenlp-$coreNLPVersion.jar\"),\n",
    "    Path(s\"${stanfordPrefix}/stanford-corenlp-$coreNLPVersion-models.jar\")\n",
    "  )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36medu.stanford.nlp.simple._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36mscala.collection.JavaConverters._\u001b[39m"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import edu.stanford.nlp.simple._\n",
    "import scala.collection.JavaConverters._"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36msentenceText\u001b[39m: \u001b[32mString\u001b[39m = \u001b[32m\"Chomsky's colorless green ideas sleep furiously.\"\u001b[39m\n",
       "\u001b[36msentence\u001b[39m: \u001b[32mSentence\u001b[39m = Chomsky's colorless green ideas sleep furiously."
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val sentenceText = \"Chomsky's colorless green ideas sleep furiously.\"\n",
    "val sentence = new Sentence(sentenceText)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mres4\u001b[39m: \u001b[32mjava\u001b[39m.\u001b[32mutil\u001b[39m.\u001b[32mList\u001b[39m[\u001b[32mString\u001b[39m] = [PERSON, O, O, O, O, O, O, O]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.nerTags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mres5\u001b[39m: \u001b[32mjava\u001b[39m.\u001b[32mutil\u001b[39m.\u001b[32mList\u001b[39m[\u001b[32mString\u001b[39m] = [NNP, POS, JJ, JJ, NNS, VBP, RB, .]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sentence.posTags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ROOT\n",
      "  S\n",
      "    NP\n",
      "      NP\n",
      "        NNP\n",
      "          Chomsky\n",
      "        POS\n",
      "          's\n",
      "      JJ\n",
      "        colorless\n",
      "      JJ\n",
      "        green\n",
      "      NNS\n",
      "        ideas\n",
      "    VP\n",
      "      VBP\n",
      "        sleep\n",
      "      ADVP\n",
      "        RB\n",
      "          furiously\n",
      "    .\n",
      "      .\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "\u001b[36mparseTree\u001b[39m: \u001b[32medu\u001b[39m.\u001b[32mstanford\u001b[39m.\u001b[32mnlp\u001b[39m.\u001b[32mtrees\u001b[39m.\u001b[32mTree\u001b[39m = (ROOT (S (NP (NP (NNP Chomsky) (POS 's)) (JJ colorless) (JJ green) (NNS ideas)) (VP (VBP sleep) (ADVP (RB furiously))) (. .)))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val parseTree = sentence.parse()\n",
    "parseTree.indentedListPrint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Garden path sentences\n",
    "\n",
    "We'll try to parse some [garden path sentences](https://en.wikipedia.org/wiki/Garden_path_sentence)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36mgardenPathSentences\u001b[39m: \u001b[32mList\u001b[39m[\u001b[32mSentence\u001b[39m] = \u001b[33mList\u001b[39m(\n",
       "  The government plans to raise taxes were approved.,\n",
       "  The complex houses married and single soldiers and their families.,\n",
       "  The horse raced past the barn fell.,\n",
       "  The old man the boat.\n",
       ")\n",
       "\u001b[36mres7_1\u001b[39m: \u001b[32mList\u001b[39m[\u001b[32mList\u001b[39m[(\u001b[32mString\u001b[39m, \u001b[32mString\u001b[39m)]] = \u001b[33mList\u001b[39m(\n",
       "  \u001b[33mList\u001b[39m(\n",
       "    (\u001b[32m\"The\"\u001b[39m, \u001b[32m\"DT\"\u001b[39m),\n",
       "    (\u001b[32m\"government\"\u001b[39m, \u001b[32m\"NN\"\u001b[39m),\n",
       "    (\u001b[32m\"plans\"\u001b[39m, \u001b[32m\"VBZ\"\u001b[39m),\n",
       "    (\u001b[32m\"to\"\u001b[39m, \u001b[32m\"TO\"\u001b[39m),\n",
       "    (\u001b[32m\"raise\"\u001b[39m, \u001b[32m\"VB\"\u001b[39m),\n",
       "    (\u001b[32m\"taxes\"\u001b[39m, \u001b[32m\"NNS\"\u001b[39m),\n",
       "    (\u001b[32m\"were\"\u001b[39m, \u001b[32m\"VBD\"\u001b[39m),\n",
       "    (\u001b[32m\"approved\"\u001b[39m, \u001b[32m\"VBN\"\u001b[39m),\n",
       "    (\u001b[32m\".\"\u001b[39m, \u001b[32m\".\"\u001b[39m)\n",
       "  ),\n",
       "\u001b[33m...\u001b[39m"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val gardenPathSentences = List(\n",
    "  \"The government plans to raise taxes were approved.\",\n",
    "  \"The complex houses married and single soldiers and their families.\",\n",
    "  \"The horse raced past the barn fell.\",\n",
    "  \"The old man the boat.\" \n",
    "  ).map(str => new Sentence(str))\n",
    "\n",
    "gardenPathSentences.map {\n",
    "  sent => \n",
    "    sent.words.asScala.toList.zip(\n",
    "      sent.posTags.asScala.toList\n",
    "    )\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sentence:\n",
      "The government plans to raise taxes were approved.\n",
      "\n",
      "Parse tree:\n",
      "ROOT\n",
      "  S\n",
      "    NP\n",
      "      DT\n",
      "        The\n",
      "      NN\n",
      "        government\n",
      "    VP\n",
      "      VBZ\n",
      "        plans\n",
      "      S\n",
      "        VP\n",
      "          TO\n",
      "            to\n",
      "          VP\n",
      "            VB\n",
      "              raise\n",
      "            SBAR\n",
      "              S\n",
      "                NP\n",
      "                  NNS\n",
      "                    taxes\n",
      "                VP\n",
      "                  VBD\n",
      "                    were\n",
      "                  VP\n",
      "                    VBN\n",
      "                      approved\n",
      "    .\n",
      "      .\n",
      "Sentence:\n",
      "The complex houses married and single soldiers and their families.\n",
      "\n",
      "Parse tree:\n",
      "ROOT\n",
      "  NP\n",
      "    NP\n",
      "      DT\n",
      "        The\n",
      "      ADJP\n",
      "        JJ\n",
      "          complex\n",
      "      NNS\n",
      "        houses\n",
      "    NP\n",
      "      NP\n",
      "        VBN\n",
      "          married\n",
      "        CC\n",
      "          and\n",
      "        JJ\n",
      "          single\n",
      "        NNS\n",
      "          soldiers\n",
      "      CC\n",
      "        and\n",
      "      NP\n",
      "        PRP$\n",
      "          their\n",
      "        NNS\n",
      "          families\n",
      "    .\n",
      "      .\n",
      "Sentence:\n",
      "The horse raced past the barn fell.\n",
      "\n",
      "Parse tree:\n",
      "ROOT\n",
      "  S\n",
      "    NP\n",
      "      DT\n",
      "        The\n",
      "      NN\n",
      "        horse\n",
      "    VP\n",
      "      VBD\n",
      "        raced\n",
      "      SBAR\n",
      "        S\n",
      "          NP\n",
      "            IN\n",
      "              past\n",
      "            DT\n",
      "              the\n",
      "            NN\n",
      "              barn\n",
      "          VP\n",
      "            VBD\n",
      "              fell\n",
      "    .\n",
      "      .\n",
      "Sentence:\n",
      "The old man the boat.\n",
      "\n",
      "Parse tree:\n",
      "ROOT\n",
      "  NP\n",
      "    NP\n",
      "      DT\n",
      "        The\n",
      "      JJ\n",
      "        old\n",
      "      NN\n",
      "        man\n",
      "    NP\n",
      "      DT\n",
      "        the\n",
      "      NN\n",
      "        boat\n",
      "    .\n",
      "      .\n"
     ]
    }
   ],
   "source": [
    "gardenPathSentences.foreach { sent => \n",
    "  println(\"Sentence:\")\n",
    "  println(sent)\n",
    "  println()\n",
    "  println(\"Parse tree:\")\n",
    "  sent.parse.indentedListPrint\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".scala",
   "mimetype": "text/x-scala",
   "name": "scala211",
   "nbconvert_exporter": "script",
   "pygments_lexer": "scala",
   "version": "2.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}