{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# How to preprocess xml with add/del substs, but without the subst tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "using HyperCollate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## example 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"<p>De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit \\n<del>met <subst><del>hem</del><add>zich</add></subst></del>\\nmeegebracht.</p>\""
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xml = \"\"\"\n",
    "<p>De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit \n",
    "<del>met <del>hem</del><add>zich</add></del>\n",
    "meegebracht.</p>\n",
    "\"\"\"\n",
    "\n",
    "processed_xml = add_subst(xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: VariantGraph Pages: 1 -->\r\n",
       "<svg width=\"877pt\" height=\"126pt\"\r\n",
       " viewBox=\"0.00 0.00 877.00 126.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 122)\">\r\n",
       "<title>VariantGraph</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-122 873,-122 873,4 -4,4\"/>\r\n",
       "<!-- v1 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>v1</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"402,-41 0,-41 0,-5 402,-5 402,-41\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"201\" y=\"-19.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit </text>\r\n",
       "</g>\r\n",
       "<!-- v2 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>v2</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"440\" cy=\"-23\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v1&#45;&gt;v2 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>v1&#45;&gt;v2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M402.229,-23C415.616,-23 426.289,-23 432.617,-23\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"432.952,-24.7501 437.951,-23 432.951,-21.2501 432.952,-24.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v3 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>v3</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"532,-64 478,-64 478,-28 532,-28 532,-64\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"505\" y=\"-42.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">met </text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v3 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>v2&#45;&gt;v3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M442.093,-23.3992C445.964,-24.8124 459.507,-29.7566 472.943,-34.6616\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"472.573,-36.3894 477.869,-36.4603 473.773,-33.1017 472.573,-36.3894\"/>\r\n",
       "</g>\r\n",
       "<!-- v8 -->\r\n",
       "<g id=\"node8\" class=\"node\"><title>v8</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"740\" cy=\"-23\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v8 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>v2&#45;&gt;v8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M442.012,-22.6749C450.811,-19.8793 515.264,-3.55271e-015 569,-0 569,-0 569,-0 636,-0 673.836,-3.55271e-015 717.651,-14.9654 733.18,-20.7528\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"732.709,-22.4462 738.004,-22.6008 733.961,-19.1778 732.709,-22.4462\"/>\r\n",
       "</g>\r\n",
       "<!-- v4 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>v4</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"570\" cy=\"-46\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v3&#45;&gt;v4 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>v3&#45;&gt;v4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M532.353,-46C543.48,-46 555.487,-46 562.618,-46\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"562.91,-47.7501 567.91,-46 562.91,-44.2501 562.91,-47.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v5 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>v5</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"662,-118 608,-118 608,-82 662,-82 662,-118\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"635\" y=\"-96.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">hem</text>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>v4&#45;&gt;v5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M572.093,-46.9373C576.552,-50.7589 593.842,-65.5792 609.02,-78.589\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"607.89,-79.9245 612.825,-81.8498 610.167,-77.2671 607.89,-79.9245\"/>\r\n",
       "</g>\r\n",
       "<!-- v6 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>v6</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"662,-64 608,-64 608,-28 662,-28 662,-64\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"635\" y=\"-42.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">zich</text>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>v4&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M572.093,-46C575.93,-46 589.27,-46 602.589,-46\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"602.87,-47.7501 607.869,-46 602.869,-44.2501 602.87,-47.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v7 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>v7</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"700\" cy=\"-46\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v5&#45;&gt;v7 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>v5&#45;&gt;v7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M657.329,-81.7181C670.323,-70.5806 685.903,-57.2262 693.824,-50.4363\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"695.302,-51.4749 697.959,-46.8922 693.024,-48.8175 695.302,-51.4749\"/>\r\n",
       "</g>\r\n",
       "<!-- v6&#45;&gt;v7 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>v6&#45;&gt;v7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M662.353,-46C673.48,-46 685.487,-46 692.618,-46\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"692.91,-47.7501 697.91,-46 692.91,-44.2501 692.91,-47.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v7&#45;&gt;v8 -->\r\n",
       "<g id=\"edge9\" class=\"edge\"><title>v7&#45;&gt;v8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M701.939,-45.4315C706.22,-42.8408 724.066,-32.0389 733.343,-26.4239\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"734.276,-27.9048 737.648,-23.8186 732.464,-24.9105 734.276,-27.9048\"/>\r\n",
       "</g>\r\n",
       "<!-- v9 -->\r\n",
       "<g id=\"node9\" class=\"node\"><title>v9</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"869,-42.5 778,-42.5 778,-3.5 869,-3.5 869,-42.5\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"823.5\" y=\"-11.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">meegebracht.</text>\r\n",
       "</g>\r\n",
       "<!-- v8&#45;&gt;v9 -->\r\n",
       "<g id=\"edge10\" class=\"edge\"><title>v8&#45;&gt;v9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M742.152,-23C745.909,-23 758.37,-23 772.395,-23\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"772.619,-24.7501 777.619,-23 772.619,-21.2501 772.619,-24.7501\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_svg(to_graph(processed_xml))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## example 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"<xml><s>...weinig van pas komen\\n<del hand=\\\"#RB\\\" rend=\\\"grey pencil\\\" resp=\\\"#EB\\\" type=\\\"crossedOut\\\">zoo, o.m. in de sexueele opvoeding van den\\ntroo<subst><del hand=\\\"#RB\\\" rend=\\\"grey pencil\\\" resp=\\\"#EB\\\" type=\\\"crossedOut\\\">p</del><add place=\\\"supralinear\\\" hand=\\\"#RB\\\" rend=\\\"grey pencil\\\" resp=\\\"#EB\\\">n</add></subst>o\\n<add place=\\\"supralinear\\\" hand=\\\"#RB\\\" rend=\\\"grey pencil\\\" resp=\\\"#EB\\\">p</add>volger...</del></s></xml>\""
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xml = \"\"\"\n",
    "<xml><s>...weinig van pas komen\n",
    "<del type=\"crossedOut\" rend=\"grey pencil\" hand=\"#RB\" resp=\"#EB\">zoo, o.m. in de sexueele opvoeding van den\n",
    "troo<del type=\"crossedOut\" rend=\"grey pencil\" hand=\"#RB\" resp=\"#EB\">p</del>\n",
    "<add place=\"supralinear\" hand=\"#RB\" rend=\"grey pencil\" resp=\"#EB\">n</add>o\n",
    "<add place=\"supralinear\" rend=\"grey pencil\" hand=\"#RB\" resp=\"#EB\">p</add>volger...</del></s></xml>\n",
    "\"\"\"\n",
    "\n",
    "processed_xml = add_subst(xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: VariantGraph Pages: 1 -->\r\n",
       "<svg width=\"1065pt\" height=\"127pt\"\r\n",
       " viewBox=\"0.00 0.00 1065.00 127.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 123)\">\r\n",
       "<title>VariantGraph</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-123 1061,-123 1061,4 -4,4\"/>\r\n",
       "<!-- v1 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>v1</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"151,-41 0,-41 0,-5 151,-5 151,-41\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"75.5\" y=\"-19.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">...weinig van pas komen</text>\r\n",
       "</g>\r\n",
       "<!-- v2 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>v2</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"189\" cy=\"-23\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v1&#45;&gt;v2 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>v1&#45;&gt;v2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M151.182,-23C163.767,-23 174.892,-23 181.534,-23\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"181.839,-24.7501 186.839,-23 181.839,-21.2501 181.839,-24.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v3 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>v3</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"489,-66 227,-66 227,-28 489,-28 489,-66\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"358\" y=\"-50.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">zoo, o.m. in de sexueele opvoeding van den</text>\r\n",
       "<text text-anchor=\"middle\" x=\"358\" y=\"-35.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">troo</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v3 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>v2&#45;&gt;v3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M191.066,-23.1533C194.541,-23.6525 206.218,-25.3307 221.883,-27.582\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"221.67,-29.3193 226.869,-28.2985 222.168,-25.8549 221.67,-29.3193\"/>\r\n",
       "</g>\r\n",
       "<!-- v13 -->\r\n",
       "<g id=\"node13\" class=\"node\"><title>v13</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"1055\" cy=\"-23\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v13 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>v2&#45;&gt;v13</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M191.205,-22.8531C207.387,-20.8943 383.334,-1.06581e-014 526,-0 526,-0 526,-0 918,-0 968.759,-3.55271e-015 1028.7,-15.7685 1047.82,-21.1928\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"1047.63,-22.9611 1052.92,-22.6749 1048.61,-19.5999 1047.63,-22.9611\"/>\r\n",
       "</g>\r\n",
       "<!-- v4 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>v4</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"527\" cy=\"-47\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v3&#45;&gt;v4 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>v3&#45;&gt;v4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M489.351,-47C502.576,-47 513.483,-47 519.867,-47\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"519.914,-48.7501 524.914,-47 519.914,-45.2501 519.914,-48.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v5 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>v5</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"619,-119 565,-119 565,-83 619,-83 619,-119\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"592\" y=\"-97.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">p</text>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>v4&#45;&gt;v5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M529.093,-47.9373C533.552,-51.7589 550.842,-66.5792 566.02,-79.589\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"564.89,-80.9245 569.825,-82.8498 567.167,-78.2671 564.89,-80.9245\"/>\r\n",
       "</g>\r\n",
       "<!-- v6 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>v6</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"619,-65 565,-65 565,-29 619,-29 619,-65\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"592\" y=\"-43.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">n</text>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>v4&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M529.093,-47C532.93,-47 546.27,-47 559.589,-47\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"559.87,-48.7501 564.869,-47 559.869,-45.2501 559.87,-48.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v7 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>v7</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"657\" cy=\"-69\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v5&#45;&gt;v7 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>v5&#45;&gt;v7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M619.353,-87.6145C630.711,-81.845 642.988,-75.6093 650.057,-72.0185\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"651.244,-73.3784 654.91,-69.5537 649.659,-70.2578 651.244,-73.3784\"/>\r\n",
       "</g>\r\n",
       "<!-- v6&#45;&gt;v7 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>v6&#45;&gt;v7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M619.353,-56.2025C630.711,-60.1691 642.988,-64.4561 650.057,-66.9248\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"649.612,-68.623 654.91,-68.6193 650.766,-65.3187 649.612,-68.623\"/>\r\n",
       "</g>\r\n",
       "<!-- v8 -->\r\n",
       "<g id=\"node8\" class=\"node\"><title>v8</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"749,-87 695,-87 695,-51 749,-51 749,-87\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"722\" y=\"-65.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">o</text>\r\n",
       "</g>\r\n",
       "<!-- v7&#45;&gt;v8 -->\r\n",
       "<g id=\"edge9\" class=\"edge\"><title>v7&#45;&gt;v8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M659.093,-69C662.93,-69 676.27,-69 689.589,-69\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"689.87,-70.7501 694.869,-69 689.869,-67.2501 689.87,-70.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v9 -->\r\n",
       "<g id=\"node9\" class=\"node\"><title>v9</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"787\" cy=\"-69\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v8&#45;&gt;v9 -->\r\n",
       "<g id=\"edge10\" class=\"edge\"><title>v8&#45;&gt;v9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M749.353,-69C760.48,-69 772.487,-69 779.618,-69\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"779.91,-70.7501 784.91,-69 779.91,-67.2501 779.91,-70.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v10 -->\r\n",
       "<g id=\"node10\" class=\"node\"><title>v10</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879,-106 825,-106 825,-70 879,-70 879,-106\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"852\" y=\"-84.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">p</text>\r\n",
       "</g>\r\n",
       "<!-- v9&#45;&gt;v10 -->\r\n",
       "<g id=\"edge11\" class=\"edge\"><title>v9&#45;&gt;v10</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M789.093,-69.3298C792.964,-70.4972 806.507,-74.5815 819.943,-78.6335\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"819.577,-80.3511 824.869,-80.1194 820.588,-77.0001 819.577,-80.3511\"/>\r\n",
       "</g>\r\n",
       "<!-- v11 -->\r\n",
       "<g id=\"node11\" class=\"node\"><title>v11</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"917\" cy=\"-46\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v9&#45;&gt;v11 -->\r\n",
       "<g id=\"edge12\" class=\"edge\"><title>v9&#45;&gt;v11</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M789.147,-68.7303C793.68,-67.6688 810.797,-63.7044 825,-61 856.645,-54.9746 894.755,-49.1555 909.623,-46.9398\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"910.022,-48.6499 914.713,-46.1876 909.511,-45.1876 910.022,-48.6499\"/>\r\n",
       "</g>\r\n",
       "<!-- v10&#45;&gt;v11 -->\r\n",
       "<g id=\"edge13\" class=\"edge\"><title>v10&#45;&gt;v11</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M879.353,-70.4316C890.943,-62.7045 903.489,-54.3404 910.484,-49.6776\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"911.72,-50.9564 914.91,-46.7268 909.779,-48.0442 911.72,-50.9564\"/>\r\n",
       "</g>\r\n",
       "<!-- v12 -->\r\n",
       "<g id=\"node12\" class=\"node\"><title>v12</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1017,-64 955,-64 955,-28 1017,-28 1017,-64\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"986\" y=\"-42.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">volger...</text>\r\n",
       "</g>\r\n",
       "<!-- v11&#45;&gt;v12 -->\r\n",
       "<g id=\"edge14\" class=\"edge\"><title>v11&#45;&gt;v12</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M919.163,-46C923.029,-46 935.965,-46 949.359,-46\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"949.698,-47.7501 954.698,-46 949.698,-44.2501 949.698,-47.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v12&#45;&gt;v13 -->\r\n",
       "<g id=\"edge15\" class=\"edge\"><title>v12&#45;&gt;v13</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M1017.36,-35.5767C1029.03,-31.5727 1041.2,-27.3931 1048.17,-25.0023\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"1048.77,-26.6446 1052.93,-23.3659 1047.64,-23.3342 1048.77,-26.6446\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_svg(to_graph(processed_xml))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## example 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"<xml>Hoe zoet moet nochtans zijn dit <subst><del>werven om</del><add>trachten naar</add></subst> een vrouw,\\nde ongewisheid vóór de liefelijke toestemming!</xml>\""
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xml = \"\"\"\n",
    "<xml>Hoe zoet moet nochtans zijn dit <del>werven om</del><add>trachten naar</add> een vrouw,\n",
    "de ongewisheid vóór de liefelijke toestemming!</xml>\n",
    "\"\"\"\n",
    "\n",
    "processed_xml = add_subst(xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: VariantGraph Pages: 1 -->\r\n",
       "<svg width=\"722pt\" height=\"98pt\"\r\n",
       " viewBox=\"0.00 0.00 722.00 98.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 94)\">\r\n",
       "<title>VariantGraph</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-94 718,-94 718,4 -4,4\"/>\r\n",
       "<!-- v1 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>v1</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"197,-63 -1.42109e-014,-63 -1.42109e-014,-27 197,-27 197,-63\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"98.5\" y=\"-41.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">Hoe zoet moet nochtans zijn dit </text>\r\n",
       "</g>\r\n",
       "<!-- v2 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>v2</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"235\" cy=\"-45\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v1&#45;&gt;v2 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>v1&#45;&gt;v2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M197.247,-45C210.173,-45 221.191,-45 227.703,-45\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"227.878,-46.7501 232.878,-45 227.878,-43.2501 227.878,-46.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v3 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>v3</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"356,-90 278,-90 278,-54 356,-54 356,-90\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"317\" y=\"-68.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">werven om</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v3 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>v2&#45;&gt;v3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M237.131,-45.3817C241.33,-46.799 256.613,-51.9571 272.623,-57.3603\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"272.307,-59.1004 277.604,-59.0412 273.426,-55.7842 272.307,-59.1004\"/>\r\n",
       "</g>\r\n",
       "<!-- v4 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>v4</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"361,-36 273,-36 273,-0 361,-0 361,-36\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"317\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">trachten naar</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v4 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>v2&#45;&gt;v4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M237.131,-44.6183C240.919,-43.3399 253.724,-39.0181 267.947,-34.218\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"268.635,-35.8326 272.813,-32.5755 267.516,-32.5164 268.635,-35.8326\"/>\r\n",
       "</g>\r\n",
       "<!-- v5 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>v5</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"399\" cy=\"-45\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v3&#45;&gt;v5 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>v3&#45;&gt;v5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M356.126,-59.1326C369.959,-54.4637 384.093,-49.6937 391.888,-47.0628\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"392.621,-48.6624 396.799,-45.4053 391.502,-45.3461 392.621,-48.6624\"/>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>v4&#45;&gt;v5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M361.278,-32.6063C373.427,-36.7067 385.17,-40.67 391.984,-42.9696\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"391.69,-44.7172 396.987,-44.658 392.809,-41.4009 391.69,-44.7172\"/>\r\n",
       "</g>\r\n",
       "<!-- v6 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>v6</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"714,-64 437,-64 437,-26 714,-26 714,-64\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"575.5\" y=\"-48.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\"> een vrouw,</text>\r\n",
       "<text text-anchor=\"middle\" x=\"575.5\" y=\"-33.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">de ongewisheid vóór de liefelijke toestemming!</text>\r\n",
       "</g>\r\n",
       "<!-- v5&#45;&gt;v6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>v5&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M401.114,-45C404.609,-45 416.046,-45 431.498,-45\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"431.887,-46.7501 436.887,-45 431.887,-43.2501 431.887,-46.7501\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_svg(to_graph(processed_xml))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## example 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"<x>something <subst><del>not this</del><del>or this</del><add>but this</add></subst> something else</x>\""
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xml = \"<x>something <del>not this</del><del>or this</del><add>but this</add> something else</x>\"\n",
    "\n",
    "processed_xml = add_subst(xml)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: VariantGraph Pages: 1 -->\r\n",
       "<svg width=\"397pt\" height=\"152pt\"\r\n",
       " viewBox=\"0.00 0.00 397.00 152.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 148)\">\r\n",
       "<title>VariantGraph</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-148 393,-148 393,4 -4,4\"/>\r\n",
       "<!-- v1 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>v1</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"77,-90 0,-90 0,-54 77,-54 77,-90\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"38.5\" y=\"-68.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">something </text>\r\n",
       "</g>\r\n",
       "<!-- v2 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>v2</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"115\" cy=\"-72\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v1&#45;&gt;v2 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>v1&#45;&gt;v2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M77.1865,-72C89.0964,-72 100.947,-72 107.867,-72\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"107.963,-73.7501 112.963,-72 107.963,-70.2501 107.963,-73.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v3 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>v3</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211,-144 153,-144 153,-108 211,-108 211,-144\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"182\" y=\"-122.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">not this</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v3 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>v2&#45;&gt;v3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M117.128,-72.9373C121.728,-76.7589 139.568,-91.5792 155.227,-104.589\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"154.188,-106.001 159.152,-107.85 156.425,-103.309 154.188,-106.001\"/>\r\n",
       "</g>\r\n",
       "<!-- v4 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>v4</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"209,-90 155,-90 155,-54 209,-54 209,-90\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"182\" y=\"-68.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">or this</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v4 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>v2&#45;&gt;v4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M117.128,-72C121.182,-72 135.517,-72 149.582,-72\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"149.732,-73.7501 154.732,-72 149.732,-70.2501 149.732,-73.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v5 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>v5</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"211,-36 153,-36 153,-0 211,-0 211,-36\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"182\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">but this</text>\r\n",
       "</g>\r\n",
       "<!-- v2&#45;&gt;v5 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>v2&#45;&gt;v5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M117.128,-71.0627C121.728,-67.2411 139.568,-52.4208 155.227,-39.411\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"156.425,-40.6915 159.152,-36.1502 154.188,-37.9993 156.425,-40.6915\"/>\r\n",
       "</g>\r\n",
       "<!-- v6 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>v6</title>\r\n",
       "<ellipse fill=\"none\" stroke=\"black\" cx=\"249\" cy=\"-72\" rx=\"2\" ry=\"2\"/>\r\n",
       "</g>\r\n",
       "<!-- v3&#45;&gt;v6 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>v3&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M205.006,-107.718C218.412,-96.5806 234.487,-83.2262 242.66,-76.4363\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"244.198,-77.4334 246.926,-72.8922 241.962,-74.7413 244.198,-77.4334\"/>\r\n",
       "</g>\r\n",
       "<!-- v4&#45;&gt;v6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>v4&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M209.063,-72C220.962,-72 234.059,-72 241.626,-72\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"241.829,-73.7501 246.829,-72 241.829,-70.2501 241.829,-73.7501\"/>\r\n",
       "</g>\r\n",
       "<!-- v5&#45;&gt;v6 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>v5&#45;&gt;v6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M205.006,-36.2819C218.412,-47.4194 234.487,-60.7738 242.66,-67.5637\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"241.962,-69.2587 246.926,-71.1078 244.198,-66.5666 241.962,-69.2587\"/>\r\n",
       "</g>\r\n",
       "<!-- v7 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>v7</title>\r\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"389,-90 287,-90 287,-54 389,-54 389,-90\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"338\" y=\"-68.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\"> something else</text>\r\n",
       "</g>\r\n",
       "<!-- v6&#45;&gt;v7 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>v6&#45;&gt;v7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M251.23,-72C255.056,-72 267.341,-72 281.464,-72\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"281.738,-73.7501 286.738,-72 281.738,-70.2501 281.738,-73.7501\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_svg(to_graph(processed_xml))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.1.0",
   "language": "julia",
   "name": "julia-1.1"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.1.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}