{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "source": [ "%pylab inline\n", "import time\n", "from collections import defaultdict\n", "import json\n", "import pandas as pd\n", "import os\n", "import sys\n", "stdout = sys.stdout\n", "reload(sys)\n", "sys.setdefaultencoding('utf-8')\n", "sys.stdout = stdout\n", "\n", "\n", "\n", "import numpy as np\n", "import requests\n", "import json \n", "import datetime\n", "\n", "\n", "from textblob import TextBlob" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "262\n" ] } ], "source": [ "with open(\"seed_creation_date.json\",\"r\") as f:\n", " sci_aw_dict=json.load(f,encoding=\"utf-8\")\n", "print len(sci_dict)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "276\n" ] } ], "source": [ "with open(\"baseline_creation_date.json\",\"r\") as f:\n", " sci_nonaw_dict=json.load(f,encoding=\"utf-8\")\n", "print len(sci_nonaw_dict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#https://stackoverflow.com/questions/34411896/how-to-get-full-wikipedia-revision-history-list-from-some-article\n", "\n", "\n", "\n", "http://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&&titles=Akiko_Iwasaki\n", " \n", " \n", "https://en.wikipedia.org/w/api.php?action=query&format=json&titles=Bla_Bla_Bla&prop=extracts&exintro&explaintext\n", " \n", " exintro - parse first paragraph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Get all revisions of Wikipedia articles" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Non awarded scientists" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [], "source": [ "#pip install pywikibot\n", "#create file user-config.py\n", "#\n", "## -*- coding: utf-8 -*-\n", "#family = 'wikipedia'\n", "#mylang = 'en'\n", "#usernames['wikipedia']['en'] = u'RevisionsBot'\n", "#\n", "#\n", "#\n", "#https://doc.wikimedia.org/pywikibot/api_ref/pywikibot.html#module-pywikibot.page\n", "\n", "import pywikibot\n", "site = pywikibot.Site(\"en\", \"wikipedia\")" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 G%C3%A1bor_A._Somorjai\n", "2 Elias_James_Corey\n", "3 Albert_Meyers\n", "4 Dan_Boneh\n", "5 Shlomo_Shamai\n", "6 Xavier_Gabaix\n", "7 Marianne_Bertrand\n", "8 Malcolm_Green_(chemist)\n", "9 Subir_Sachdev\n", "10 Jing_Li_(chemist)\n", "11 Steven_Kaplan_(economist)\n", "12 John_Perdew\n", "13 Arun_Majumdar\n", "14 Giuseppe_Mingione\n", "15 Menachem_Elimelech\n", "16 Juan_Mart%C3%ADn_Maldacena\n", "17 Theodore_H._Geballe\n", "18 K%C3%A1ri_Stef%C3%A1nsson\n", "19 Ian_Ford\n", "20 Stephen_R._Bloom\n", "21 Andrei_Shleifer\n", "22 Ralph_Weissleder\n", "23 Axel_D._Becke\n", "24 John_Robertson_(physicist)\n", "25 Matja%C5%BE_Perc\n", "26 Ariel_Rubinstein\n", "27 Stephen_Vargo\n", "28 Michael_Boehnke\n", "29 Oded_Schramm\n", "30 Michael_I._Jordan\n", "31 Sergio_Ferrara\n", "32 Wayne_Fuller\n", "33 Rudolf_Grimm\n", "34 Joseph_Wang\n", "35 Immanuel_Bloch\n", "36 Andrew_Bernard\n", "37 Cumrun_Vafa\n", "38 George_Loewenstein\n", "39 Dimitri_Nanopoulos\n", "40 Joann_E._Manson\n", "Error when access: Joann_E._Manson\n", "41 Robert_J._Birgeneau\n", "42 Pauline_van_den_Driessche\n", "43 Murray_Brennan\n", "44 Ralph_Nuzzo\n", "45 Donal_Bradley\n", "46 Paul_Ridker\n", "47 Ulrike_Malmendier\n", "48 Allen_Steere\n", "49 Edward_Ott\n", "50 Alexander_Zamolodchikov\n", "51 Helmut_Schwarz\n", "52 Olivier_Blanchard\n", "53 Martin_Eichenbaum\n", "54 %C5%BDeljko_Reiner\n", "55 Jean-Michel_Sav%C3%A9ant\n", "56 Ross_Levine\n", "57 Reinhart_Ahlrichs\n", "58 Alex_Zettl\n", "59 Sendhil_Mullainathan\n", "60 Vincenzo_Balzani\n", "61 Valarie_Zeithaml\n", "62 Robert_West_(chemist)\n", "63 Rolf_Apweiler\n", "64 Bruce_D._Walker\n", "65 Peter_Gavin_Hall\n", "66 Yang_Huanming\n", "67 Tomas_H%C3%B6kfelt\n", "68 Michael_F._Lappert\n", "69 Elza_Erkip\n", "70 Guido_Imbens\n", "71 G%C3%A9rald_Bastard\n", "72 Michele_Parrinello\n", "73 Kendall_Houk\n", "74 Irun_Cohen\n", "75 Michael_L._Klein\n", "76 Robert_Peter_Gale\n", "77 Barry_Everitt\n", "78 Takuzo_Aida\n", "79 Didier_Astruc\n", "80 Peter_Knight_(scientist)\n", "81 Alan_Aderem\n", "82 Birger_Wernerfelt\n", "83 C._N._R._Rao\n", "84 Witold_Nazarewicz\n", "85 Masaru_Tomita\n", "86 Ruedi_Aebersold\n", "87 Sharon_Ann_Hunt\n", "88 John_A._List\n", "89 Stuart_A._Rice\n", "90 William_H._Miller_(chemistry)\n", "91 Sergey_Fomin\n", "92 Philip_Kim\n", "93 Keith_Olive\n", "94 Heng_Li\n", "95 Robert_Tibshirani\n", "96 Robert_H._Crabtree\n", "97 Bradley_Efron\n", "98 Michael_L._Tushman\n", "99 Tasawar_Hayat\n", "100 Stephen_J._Lippard\n" ] } ], "source": [ "list_rev=[]\n", "erorr_list=[]\n", "j=1\n", "for sci_name in sci_nonaw_dict.keys():\n", " name=sci_name.split(\"/\")[-1]\n", " print j,name\n", " try:\n", " page = pywikibot.Page(site, name)\n", " revs = page.revisions(content=True)\n", " for i in revs:\n", " temp_dict=i.__dict__\n", " temp_dict[\"article\"]=name\n", " list_rev.append(temp_dict)\n", " except:\n", " print \"Error when access:\", name\n", " erorr_list.append(name)\n", " \n", " j+=1" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[u'Joann_E._Manson']\n" ] } ], "source": [ "print erorr_list" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 24163 entries, 0 to 24162\n", "Data columns (total 12 columns):\n", "_content_model 24163 non-null object\n", "_parent_id 24163 non-null int64\n", "_sha1 24132 non-null object\n", "anon 24163 non-null bool\n", "article 24163 non-null object\n", "comment 24163 non-null object\n", "minor 24163 non-null bool\n", "revid 24163 non-null int64\n", "rollbacktoken 0 non-null object\n", "text 24132 non-null object\n", "timestamp 24163 non-null datetime64[ns]\n", "user 24163 non-null object\n", "dtypes: bool(2), datetime64[ns](1), int64(2), object(7)\n", "memory usage: 1.9+ MB\n" ] } ], "source": [ "df=pd.DataFrame(list_rev)\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 179, "metadata": { "scrolled": true }, "outputs": [], "source": [ "df.text=df.text.apply(lambda x: \" \".join(x.split()) if (x==x)&(type(x)==unicode) else None)\n", "df.to_csv(\"baseline_revisions.csv\",sep=\"\\t\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Count words in revision text" ] }, { "cell_type": "code", "execution_count": 196, "metadata": { "scrolled": true }, "outputs": [], "source": [ "df[\"text_len\"]=df.text.apply(lambda x: len(TextBlob(x).words) if (x==x)&(type(x)==unicode) else None)" ] }, { "cell_type": "code", "execution_count": 197, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_content_model_parent_id_sha1anonarticlecommentminorrevidrollbacktokentexttimestampusertext_len
0wikitext807253069a0d1ed8b580892cc7dabf3d770bc69d60bed5fc7FalseG%C3%A1bor_A._SomorjaiGrammar correctionTrue811195141None{{Infobox person |name = Gábor A. Somorjai |im...2017-11-20 02:30:05HandsomeMrToad1593.0
1wikitext797794813d0c871038a54950f633bb93b5f73ed1a820a7a58FalseG%C3%A1bor_A._SomorjaiRescuing 3 sources and tagging 0 as dead. #IAB...False807253069None{{Infobox person |name = Gábor A. Somorjai |im...2017-10-26 21:09:27InternetArchiveBot1594.0
\n", "
" ], "text/plain": [ " _content_model _parent_id _sha1 anon \\\n", "0 wikitext 807253069 a0d1ed8b580892cc7dabf3d770bc69d60bed5fc7 False \n", "1 wikitext 797794813 d0c871038a54950f633bb93b5f73ed1a820a7a58 False \n", "\n", " article comment \\\n", "0 G%C3%A1bor_A._Somorjai Grammar correction \n", "1 G%C3%A1bor_A._Somorjai Rescuing 3 sources and tagging 0 as dead. #IAB... \n", "\n", " minor revid rollbacktoken \\\n", "0 True 811195141 None \n", "1 False 807253069 None \n", "\n", " text timestamp \\\n", "0 {{Infobox person |name = Gábor A. Somorjai |im... 2017-11-20 02:30:05 \n", "1 {{Infobox person |name = Gábor A. Somorjai |im... 2017-10-26 21:09:27 \n", "\n", " user text_len \n", "0 HandsomeMrToad 1593.0 \n", "1 InternetArchiveBot 1594.0 " ] }, "execution_count": 197, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "code", "execution_count": 200, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df.to_csv(\"baseline_revisions.csv\",sep=\"\\t\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Awarded scientists" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 Michael_Grunstein\n", "2 Bernd_Giese\n", "3 John_Tate\n", "4 Nobuhiro_Kiyotaki\n", "5 Shinya_Yamanaka\n", "6 Richard_Hynes\n", "7 Mikhail_Leonidovich_Gromov\n", "8 Charles_L._Bennett\n", "9 David_Julius\n", "10 C%C3%A9dric_Villani\n", "11 Nancy_A._Moran\n", "12 Peter_Howitt_(economist)\n", "13 Steven_Van_Slyke\n", "14 Hideo_Hosono\n", "15 Barbara_Liskov\n", "16 Ching_W._Tang\n", "17 Charles_Sawyers\n", "18 Thomas_Ebbesen\n", "19 Tu_Youyou\n", "20 Irwin_M._Jacobs\n", "21 Martin_Hairer\n", "22 Jeffrey_M._Friedman\n", "23 Leslie_Lamport\n", "24 Ferenc_Krausz\n", "25 William_Nordhaus\n", "26 Paul_Corkum\n", "27 John_G._Thompson\n", "28 Jack_W._Szostak\n", "29 Bruce_Beutler\n", "30 John_Milnor\n", "31 Matthew_Rabin\n", "32 Charles_F._Manski\n", "33 M._Hashem_Pesaran\n", "34 Robert_Lefkowitz\n", "35 Endre_Szemer%C3%A9di\n", "36 Patrick_O._Brown\n", "37 Harold_Demsetz\n", "38 Sam_Peltzman\n", "39 Aziz_Sancar\n", "40 James_Rothman\n", "41 Anne_Osborn_Krueger\n", "42 Alan_Krueger\n", "43 Tony_Atkinson\n", "44 John_B._Goodenough\n", "45 Seiji_Ogawa\n", "46 Peter_C._B._Phillips\n", "47 David_R._Smith\n", "48 Charles_L._Kane\n", "49 Ernest_McCulloch\n", "50 Eric_H._Davidson\n", "51 Jacques_Tits\n", "52 Shizuo_Akira\n", "53 John_A._List\n", "54 Paul_Alivisatos\n", "55 Victor_Ambros\n", "56 Takaaki_Kajita\n", "57 Harald_zur_Hausen\n", "58 Martin_Weitzman\n", "59 Saul_Perlmutter\n", "60 Isamu_Akasaki\n", "61 Jordi_Gal%C3%AD\n", "62 George_E._Smith\n", "63 Geoffrey_Marcy\n", "64 William_Baumol\n", "65 Vera_Rubin\n", "66 Silvio_Micali\n", "67 Makoto_Kobayashi_(physicist)\n", "68 Eric_Betzig\n", "69 Gordon_Tullock\n", "70 Galen_D._Stucky\n", "71 Lars_Peter_Hansen\n", "72 Andre_Geim\n", "73 Manjul_Bhargava\n", "74 Arthur_B._McDonald\n", "75 Jacques_Miller\n", "76 Charles_Lee_(scientist)\n", "77 Robert_S._Langer\n", "78 Alvin_E._Roth\n", "79 Pierre_Deligne\n", "80 Thomas_A._Steitz\n", "81 Erkki_Ruoslahti\n", "82 Robert_Tjian\n", "83 Emmanuelle_Charpentier\n", "84 Akira_Fujishima\n", "85 Randy_Schekman\n", "86 Shimon_Sakaguchi\n", "87 Morris_Chang\n", "88 William_Wootters\n", "89 Whitfield_Diffie\n", "90 Stephen_Ross_(economist)\n", "91 Peidong_Yang\n", "92 Robert_J._Shiller\n", "93 Jules_A._Hoffmann\n", "94 Andrew_Viterbi\n", "95 Rory_Collins\n", "96 John_L._Hennessy\n", "97 Fran%C3%A7ois_Englert\n", "98 Elon_Lindenstrauss\n", "99 Joshua_Angrist\n", "100 Stephen_J._Lippard\n", "101 Carol_W._Greider\n", "102 David_Card\n", "103 Ryoo_Ryong\n", "104 May-Britt_Moser\n", "105 David_Forbes_Hendry\n", "106 Peter_Zoller\n", "107 Philippe_Aghion\n", "108 Leslie_Valiant\n", "109 Brian_Druker\n", "110 M_Stanley_Whittingham\n", "111 Charles_M._Lieber\n", "112 Louis_E._Brus\n", "113 Stephen_W._Scherer\n", "114 Martin_Feldstein\n", "115 Krzysztof_Matyjaszewski\n", "116 Hideo_Ohno\n", "117 Mark_Gertler_(economist)\n", "118 Alain_Aspect\n", "119 Martin_Karplus\n", "120 Alberto_Alesina\n", "121 John_Clauser\n", "122 Deborah_S._Jin\n", "123 Shafi_Goldwasser\n", "124 Willard_Boyle\n", "125 Michael_Berry_(physicist)\n", "126 Mildred_Dresselhaus\n", "127 Yakir_Aharonov\n", "128 Hiroshi_Amano\n", "129 Dennis_Slamon\n", "130 Serge_Haroche\n", "131 Gary_Ruvkun\n", "132 Michael_Levitt\n", "133 Yoshinori_Ohsumi\n", "134 Eugene_Fama\n", "135 Elinor_Ostrom\n", "136 Martin_Hellman\n", "137 Shuji_Nakamura\n", "138 Jeffrey_I._Gordon\n", "139 David_J._Wineland\n", "140 Paul_Krugman\n", "141 Eli_Yablonovitch\n", "142 Joseph_Altman\n", "143 Brian_Schmidt\n", "144 Dale_T._Mortensen\n", "145 Robert_Edwards_(physiologist)\n", "146 Aharon_Razin\n", "147 Leigh_Canham\n", "148 Anthony_Pawson\n", "149 Lloyd_Shapley\n", "150 Roger_Penrose\n", "151 Douglas_Diamond\n", "152 Yoichiro_Nambu\n", "153 John_Forbes_Nash_Jr.\n", "154 Halbert_White\n", "155 Jacqueline_Barton\n", "156 Michael_Stonebraker\n", "157 B._Jayant_Baliga\n", "158 Peter_Higgs\n", "159 Judea_Pearl\n", "160 John_Hardman_Moore\n", "161 Michael_Gr%C3%A4tzel\n", "162 Anton_Zeilinger\n", "163 Jean_Fr%C3%A9chet\n", "164 John_Pendry\n", "165 Charles_David_Allis\n", "166 Paul_L._Modrich\n", "167 Louis_Nirenberg\n", "168 Thomas_C._S%C3%BCdhof\n", "169 Peter_Walter\n", "170 Adam_Riess\n", "171 Thomas_J._Sargent\n", "172 Satoshi_%C5%8Cmura\n", "173 Oliver_E._Williamson\n", "174 Bruce_Ames\n", "175 Charles_K._Kao\n", "176 Stanislav_Smirnov\n", "177 David_Spergel\n", "178 Ada_Yonath\n", "179 Brian_Kobilka\n", "180 Ralph_M._Steinman\n", "181 Arieh_Warshel\n", "182 John_Gurdon\n", "183 Howard_Cedar\n", "184 Kazutoshi_Mori\n", "185 Douglas_L._Coleman\n", "186 Masatoshi_Takeichi\n", "187 Stephen_E._Harris\n", "188 Jerry_A._Hausman\n", "189 Angus_Deaton\n", "190 Chad_Mirkin\n", "191 Israel_Kirzner\n", "192 Maryam_Mirzakhani\n", "193 Akira_Suzuki_(chemist)\n", "194 Tomas_Lindahl\n", "195 Charles_T._Kresge\n", "196 Nadrian_Seeman\n", "197 Jean_Tirole\n", "198 Lene_Hau\n", "199 Anthony_R._Hunter\n", "200 Winslow_Briggs\n", "201 Lyman_Page\n", "202 Carolyn_R._Bertozzi\n", "203 Richard_Posner\n", "204 Ernst_Fehr\n", "205 Yoshinori_Tokura\n", "206 William_E._Moerner\n", "207 Omar_M._Yaghi\n", "208 Robert_G._Roeder\n", "209 Osamu_Shimomura\n", "210 Laurens_W._Molenkamp\n", "211 Martin_Chalfie\n", "212 William_C._Campbell_(scientist)\n", "213 Richard_F._Heck\n", "214 Fran%C3%A7oise_Barr%C3%A9-Sinoussi\n", "215 Graeme_Moad\n", "216 James_Till\n", "217 Charles_P._Thacker\n", "218 G._David_Tilman\n", "219 Ei-ichi_Negishi\n", "220 Edvard_Moser\n", "221 Gordon_Moore\n", "222 Nicholas_Lydon\n", "223 Stefan_Hell\n", "224 Ramamoorthy_Ramesh\n", "225 Roger_Y._Tsien\n", "226 Elizabeth_Blackburn\n", "227 Didier_Queloz\n", "228 Peter_Diamond\n", "229 Sajeev_John\n", "230 Joseph_Felsenstein\n", "231 Mark_Granovetter\n", "232 Shoucheng_Zhang\n", "233 Charles_H._Bennett_(computer_scientist)\n", "234 Peter_Crane\n", "235 Christopher_A._Pissarides\n", "236 Adrian_Bird\n", "237 Artur_Avila\n", "238 Gilles_Brassard\n", "239 Richard_Blundell\n", "240 John_O%27Keefe_(neuroscientist)\n", "241 Venkatraman_Ramakrishnan\n", "242 Karl_Barry_Sharpless\n", "243 Gary_Schuster\n", "244 Michel_Mayor\n", "245 Michael_Wigler\n", "246 Luc_Montagnier\n", "247 James_E._Darnell\n", "248 Allen_J._Bard\n", "249 Robert_H._Dennard\n", "250 Dan_Shechtman\n", "251 Konstantin_Novoselov\n", "252 Graham_Hutchings\n", "253 Jennifer_Doudna\n", "254 Armen_Alchian\n", "255 Kevin_M._Murphy\n", "256 Ng%C3%B4_B%E1%BA%A3o_Ch%C3%A2u\n", "257 Juan_Ignacio_Cirac_Sasturain\n", "258 Richard_Peto\n", "259 Zhong_Lin_Wang\n", "260 Christopher_A._Sims\n", "261 Yakov_Sinai\n", "262 Toshihide_Maskawa\n", "\n", "RangeIndex: 63003 entries, 0 to 63002\n", "Data columns (total 12 columns):\n", "_content_model 63003 non-null object\n", "_parent_id 63003 non-null int64\n", "_sha1 62984 non-null object\n", "anon 63003 non-null bool\n", "article 63003 non-null object\n", "comment 63003 non-null object\n", "minor 63003 non-null bool\n", "revid 63003 non-null int64\n", "rollbacktoken 0 non-null object\n", "text 62984 non-null object\n", "timestamp 63003 non-null datetime64[ns]\n", "user 63003 non-null object\n", "dtypes: bool(2), datetime64[ns](1), int64(2), object(7)\n", "memory usage: 4.9+ MB\n" ] } ], "source": [ "list_rev2=[]\n", "erorr_list2=[]\n", "\n", "j=1\n", "for sci_name in sci_aw_dict.keys():\n", " name=sci_name.split(\"/\")[-1]\n", " print j,name\n", " try:\n", " page = pywikibot.Page(site, name)\n", " revs = page.revisions(content=True)\n", " for i in revs:\n", " temp_dict=i.__dict__\n", " temp_dict[\"article\"]=name\n", " list_rev2.append(temp_dict)\n", " except:\n", " print \"Error when access:\", name\n", " erorr_list2.append(name)\n", " \n", " j+=1\n", " \n", "df2=pd.DataFrame(list_rev2)\n", "df2.info()\n", "\n", "df2.text=df2.text.apply(lambda x: \" \".join(x.split()) if (x==x)&(type(x)==unicode) else None)\n", "df2.to_csv(\"seed_revisions.csv\",sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[]\n" ] } ], "source": [ "print erorr_list2" ] }, { "cell_type": "code", "execution_count": 198, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2[\"text_len\"]=df2.text.apply(lambda x: len(TextBlob(x).words) if (x==x)&(type(x)==unicode) else None)" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "df2.to_csv(\"seed_revisions.csv\",sep=\"\\t\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# convert wiki to plain text" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"text_clean\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpreprocessor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\" \"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mleaves\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pandas\\core\\series.pyc\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, convert_dtype, args, **kwds)\u001b[0m\n\u001b[0;32m 2549\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2550\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masobject\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2551\u001b[1;33m \u001b[0mmapped\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2552\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2553\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mpandas/_libs/src/inference.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[1;34m()\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"text_clean\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpreprocessor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\" \"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mleaves\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\parser.pyc\u001b[0m in \u001b[0;36mparse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 172\u001b[0m \u001b[1;34m\"or invoke one of its pattern attributes.\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 173\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 174\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtopPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 175\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mparseTest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msource\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36mparse\u001b[1;34m(self, source)\u001b[0m\n\u001b[0;32m 254\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 255\u001b[0m \u001b[1;31m# parse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 256\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 257\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 258\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstop_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 907\u001b[0m \u001b[1;31m# case success: append node to global value sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 908\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 909\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 910\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 825\u001b[0m \u001b[1;31m# choice pattern transfos, avoid useless nesting.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 827\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 828\u001b[0m \u001b[1;31m# apply possible transformations stored on self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;31m# (in addition to the ones on wrapped pattern)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 825\u001b[0m \u001b[1;31m# choice pattern transfos, avoid useless nesting.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 827\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 828\u001b[0m \u001b[1;31m# apply possible transformations stored on self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;31m# (in addition to the ones on wrapped pattern)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 825\u001b[0m \u001b[1;31m# choice pattern transfos, avoid useless nesting.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 827\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 828\u001b[0m \u001b[1;31m# apply possible transformations stored on self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;31m# (in addition to the ones on wrapped pattern)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 907\u001b[0m \u001b[1;31m# case success: append node to global value sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 908\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 909\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 910\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 907\u001b[0m \u001b[1;31m# case success: append node to global value sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 908\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 909\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 910\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1605\u001b[0m \u001b[1;31m# simply check through wrapped pattern\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1606\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1607\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1608\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1609\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mPijnuError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 825\u001b[0m \u001b[1;31m# choice pattern transfos, avoid useless nesting.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 827\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 828\u001b[0m \u001b[1;31m# apply possible transformations stored on self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;31m# (in addition to the ones on wrapped pattern)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 907\u001b[0m \u001b[1;31m# case success: append node to global value sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 908\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 909\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 910\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 825\u001b[0m \u001b[1;31m# choice pattern transfos, avoid useless nesting.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 826\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 827\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 828\u001b[0m \u001b[1;31m# apply possible transformations stored on self\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;31m# (in addition to the ones on wrapped pattern)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1246\u001b[0m \u001b[1;31m# case success: append node to child sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1247\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1248\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1249\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1250\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 907\u001b[0m \u001b[1;31m# case success: append node to global value sequence\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 908\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 909\u001b[1;33m \u001b[0mnode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_memoCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 910\u001b[0m \u001b[0mpos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 911\u001b[0m \u001b[0mchildNodes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_memoCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 509\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDO_STATS\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mPattern\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstats\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchecks\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 511\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_realCheck\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 512\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mInvalidation\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 513\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\pattern.pyc\u001b[0m in \u001b[0;36m_realCheck\u001b[1;34m(self, source, pos)\u001b[0m\n\u001b[0;32m 1046\u001b[0m \u001b[1;31m# -- return nil node, keep pos unchanged\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1047\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mPijnuError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1048\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mNode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mNode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mNIL\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpos\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mpos\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1049\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1050\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_message\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\node.pyc\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, pattern, value, start, end, source)\u001b[0m\n\u001b[0;32m 174\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpattern\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpattern\u001b[0m \u001b[1;31m# this node's generator\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 175\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mform\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;31m# initial value form -- case doActions\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 176\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdefineKind\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# LEAF / BRANCH\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 177\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 178\u001b[0m \u001b[1;31m# source\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Anaconda2\\lib\\site-packages\\pijnu\\library\\node.pyc\u001b[0m in \u001b[0;36mdefineKind\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 198\u001b[0m \u001b[1;31m# -- basically a string\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 199\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 200\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mLEAF\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 202\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcleanBranch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "#pip install mediawiki-parser\n", "#https://github.com/peter17/mediawiki-parser \n", "\n", "\n", "\n", "from mediawiki_parser.preprocessor import make_parser\n", "from mediawiki_parser.text import make_parser\n", "\n", "templates = {}\n", "preprocessor = make_parser(templates)\n", "parser = make_parser()\n", "\n", "\n", "\n", "df[\"text_clean\"]=df.text.apply(lambda x: parser.parse(preprocessor.parse(x).leaves()).value.strip())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 2 }