{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to preprocess xml with add/del substs, but without the subst tag" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "using HyperCollate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## example 1" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"

De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit \\nmet hemzich\\nmeegebracht.

\"" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xml = \"\"\"\n", "

De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit \n", "met hemzich\n", "meegebracht.

\n", "\"\"\"\n", "\n", "processed_xml = add_subst(xml)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "VariantGraph\r\n", "\r\n", "\r\n", "v1\r\n", "\r\n", "De te streng doorgedreven rationalisatie van zijn prinsenjeugd had dit \r\n", "\r\n", "\r\n", "v2\r\n", "\r\n", "\r\n", "\r\n", "v1->v2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v3\r\n", "\r\n", "met \r\n", "\r\n", "\r\n", "v2->v3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v8\r\n", "\r\n", "\r\n", "\r\n", "v2->v8\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4\r\n", "\r\n", "\r\n", "\r\n", "v3->v4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v5\r\n", "\r\n", "hem\r\n", "\r\n", "\r\n", "v4->v5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6\r\n", "\r\n", "zich\r\n", "\r\n", "\r\n", "v4->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v7\r\n", "\r\n", "\r\n", "\r\n", "v5->v7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6->v7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v7->v8\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v9\r\n", "\r\n", "meegebracht.\r\n", "\r\n", "\r\n", "v8->v9\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_svg(to_graph(processed_xml))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## example 2" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"...weinig van pas komen\\nzoo, o.m. in de sexueele opvoeding van den\\ntroopno\\npvolger...\"" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xml = \"\"\"\n", "...weinig van pas komen\n", "zoo, o.m. in de sexueele opvoeding van den\n", "troop\n", "no\n", "pvolger...\n", "\"\"\"\n", "\n", "processed_xml = add_subst(xml)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "VariantGraph\r\n", "\r\n", "\r\n", "v1\r\n", "\r\n", "...weinig van pas komen\r\n", "\r\n", "\r\n", "v2\r\n", "\r\n", "\r\n", "\r\n", "v1->v2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v3\r\n", "\r\n", "zoo, o.m. in de sexueele opvoeding van den\r\n", "troo\r\n", "\r\n", "\r\n", "v2->v3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v13\r\n", "\r\n", "\r\n", "\r\n", "v2->v13\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4\r\n", "\r\n", "\r\n", "\r\n", "v3->v4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v5\r\n", "\r\n", "p\r\n", "\r\n", "\r\n", "v4->v5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6\r\n", "\r\n", "n\r\n", "\r\n", "\r\n", "v4->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v7\r\n", "\r\n", "\r\n", "\r\n", "v5->v7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6->v7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v8\r\n", "\r\n", "o\r\n", "\r\n", "\r\n", "v7->v8\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v9\r\n", "\r\n", "\r\n", "\r\n", "v8->v9\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v10\r\n", "\r\n", "p\r\n", "\r\n", "\r\n", "v9->v10\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v11\r\n", "\r\n", "\r\n", "\r\n", "v9->v11\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v10->v11\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v12\r\n", "\r\n", "volger...\r\n", "\r\n", "\r\n", "v11->v12\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v12->v13\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_svg(to_graph(processed_xml))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## example 3" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"Hoe zoet moet nochtans zijn dit werven omtrachten naar een vrouw,\\nde ongewisheid vóór de liefelijke toestemming!\"" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xml = \"\"\"\n", "Hoe zoet moet nochtans zijn dit werven omtrachten naar een vrouw,\n", "de ongewisheid vóór de liefelijke toestemming!\n", "\"\"\"\n", "\n", "processed_xml = add_subst(xml)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "VariantGraph\r\n", "\r\n", "\r\n", "v1\r\n", "\r\n", "Hoe zoet moet nochtans zijn dit \r\n", "\r\n", "\r\n", "v2\r\n", "\r\n", "\r\n", "\r\n", "v1->v2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v3\r\n", "\r\n", "werven om\r\n", "\r\n", "\r\n", "v2->v3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4\r\n", "\r\n", "trachten naar\r\n", "\r\n", "\r\n", "v2->v4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v5\r\n", "\r\n", "\r\n", "\r\n", "v3->v5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4->v5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6\r\n", "\r\n", " een vrouw,\r\n", "de ongewisheid vóór de liefelijke toestemming!\r\n", "\r\n", "\r\n", "v5->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_svg(to_graph(processed_xml))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## example 4" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"something not thisor thisbut this something else\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xml = \"something not thisor thisbut this something else\"\n", "\n", "processed_xml = add_subst(xml)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "VariantGraph\r\n", "\r\n", "\r\n", "v1\r\n", "\r\n", "something \r\n", "\r\n", "\r\n", "v2\r\n", "\r\n", "\r\n", "\r\n", "v1->v2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v3\r\n", "\r\n", "not this\r\n", "\r\n", "\r\n", "v2->v3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4\r\n", "\r\n", "or this\r\n", "\r\n", "\r\n", "v2->v4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v5\r\n", "\r\n", "but this\r\n", "\r\n", "\r\n", "v2->v5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v6\r\n", "\r\n", "\r\n", "\r\n", "v3->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v4->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v5->v6\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "v7\r\n", "\r\n", " something else\r\n", "\r\n", "\r\n", "v6->v7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_svg(to_graph(processed_xml))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.1.0", "language": "julia", "name": "julia-1.1" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.1.0" } }, "nbformat": 4, "nbformat_minor": 2 }