{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### multivariate exploration of the geometry corpus\n", "\n", "This is basicaly a raw exploration of the various info we can get from our corpus about wikipedia pages linked by the [`List of geometry topics` page](http://en.wikipedia.org/wiki/List_of_geometry_topics?oldformat=true). The aim of this study is to build an indicator allowing us to order pages by difficulty level." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%run \"libraries.ipynb\"\n", "%config InlineBackend.figure_formats=['svg']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## data source preview" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | average word length | \n", "first revision | \n", "hidden users | \n", "length | \n", "page views | \n", "unique ip users | \n", "unique registered users | \n", "unique users | \n", "words | \n", "Pagerank pro 0.8 | \n", "... | \n", "nbcontributorsBot | \n", "nbcontributorsIP | \n", "nbcontributorsMembers | \n", "nbrevisions | \n", "nbrevisionsBot | \n", "nbrevisionsIP | \n", "nbrevisionsMembers | \n", "ns | \n", "pageid | \n", "quality | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
pagename | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2D computer graphics | \n", "5.912321 | \n", "2001-10-13T06:23:27Z | \n", "0 | \n", "24124 | \n", "930542 | \n", "83 | \n", "139 | \n", "222 | \n", "3490 | \n", "2.540488 | \n", "... | \n", "18 | \n", "78 | \n", "119 | \n", "370 | \n", "23 | \n", "124 | \n", "223 | \n", "0 | \n", "35248 | \n", "4 | \n", "
2D geometric model | \n", "5.786585 | \n", "2004-03-08T00:49:59Z | \n", "0 | \n", "1113 | \n", "155960 | \n", "13 | \n", "28 | \n", "41 | \n", "164 | \n", "1.846966 | \n", "... | \n", "3 | \n", "13 | \n", "24 | \n", "54 | \n", "3 | \n", "20 | \n", "31 | \n", "0 | \n", "511647 | \n", "1 | \n", "
3D computer graphics | \n", "6.360368 | \n", "2007-03-21T05:56:20Z | \n", "0 | \n", "8803 | \n", "3174611 | \n", "258 | \n", "240 | \n", "498 | \n", "1196 | \n", "3.665079 | \n", "... | \n", "28 | \n", "250 | \n", "211 | \n", "893 | \n", "65 | \n", "368 | \n", "460 | \n", "0 | \n", "10175073 | \n", "4 | \n", "
3D projection | \n", "5.907763 | \n", "2003-09-07T18:48:57Z | \n", "0 | \n", "8987 | \n", "1141382 | \n", "98 | \n", "113 | \n", "211 | \n", "1301 | \n", "3.430536 | \n", "... | \n", "16 | \n", "95 | \n", "96 | \n", "351 | \n", "25 | \n", "117 | \n", "209 | \n", "0 | \n", "313741 | \n", "2 | \n", "
3-sphere | \n", "5.071429 | \n", "2002-02-19T11:12:25Z | \n", "0 | \n", "17595 | \n", "488222 | \n", "44 | \n", "117 | \n", "161 | \n", "2898 | \n", "3.693437 | \n", "... | \n", "12 | \n", "44 | \n", "103 | \n", "277 | \n", "13 | \n", "59 | \n", "205 | \n", "0 | \n", "39792 | \n", "4 | \n", "
Absolute geometry | \n", "5.561167 | \n", "2004-06-02T19:31:39Z | \n", "0 | \n", "5846 | \n", "77451 | \n", "22 | \n", "59 | \n", "81 | \n", "891 | \n", "2.870685 | \n", "... | \n", "14 | \n", "20 | \n", "44 | \n", "104 | \n", "15 | \n", "27 | \n", "62 | \n", "0 | \n", "699294 | \n", "2 | \n", "
Acute and obtuse triangles | \n", "5.818387 | \n", "2014-10-10T19:11:27Z | \n", "0 | \n", "9048 | \n", "5549 | \n", "1 | \n", "2 | \n", "3 | \n", "1327 | \n", "0.829064 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "44076423 | \n", "NaN | \n", "
Affine geometry | \n", "5.424357 | \n", "2003-06-11T09:28:43Z | \n", "0 | \n", "15245 | \n", "277910 | \n", "30 | \n", "82 | \n", "112 | \n", "2373 | \n", "3.495030 | \n", "... | \n", "15 | \n", "30 | \n", "64 | \n", "205 | \n", "22 | \n", "40 | \n", "143 | \n", "0 | \n", "243890 | \n", "3 | \n", "
Affine space | \n", "5.325827 | \n", "2003-08-18T04:32:19Z | \n", "0 | \n", "13202 | \n", "401482 | \n", "64 | \n", "105 | \n", "169 | \n", "2087 | \n", "4.010550 | \n", "... | \n", "18 | \n", "61 | \n", "84 | \n", "384 | \n", "30 | \n", "91 | \n", "263 | \n", "0 | \n", "298834 | \n", "2 | \n", "
Affine transformation | \n", "5.599894 | \n", "2002-02-25T15:51:15Z | \n", "0 | \n", "12454 | \n", "1333126 | \n", "78 | \n", "139 | \n", "217 | \n", "1887 | \n", "4.072621 | \n", "... | \n", "21 | \n", "76 | \n", "111 | \n", "371 | \n", "26 | \n", "107 | \n", "238 | \n", "0 | \n", "38449 | \n", "2 | \n", "
10 rows × 32 columns
\n", "