{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Parallel Analysis on PCA" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "application/javascript": [ "if(window['d3'] === undefined ||\n", " window['Nyaplot'] === undefined){\n", " var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", "\n", "\n", "\n", " var shim = {\"d3\":{\"exports\":\"d3\"},\"downloadable\":{\"exports\":\"downloadable\"}};\n", "\n", " require.config({paths: path, shim:shim});\n", "\n", "\n", "require(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\n", "\n", "\tvar script = d3.select(\"head\")\n", "\t .append(\"script\")\n", "\t .attr(\"src\", \"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\")\n", "\t .attr(\"async\", true);\n", "\n", "\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\n", "\n", "\n", "\t var event = document.createEvent(\"HTMLEvents\");\n", "\t event.initEvent(\"load_nyaplot\",false,false);\n", "\t window.dispatchEvent(event);\n", "\t console.log('Finished loading Nyaplotjs');\n", "\n", "\t};\n", "\n", "\n", "});});\n", "}\n" ], "text/plain": [ "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"https://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"https://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Parallel Analysis: Iteration 0\n", "Parallel Analysis: Iteration 1\n", "Parallel Analysis: Iteration 2\n", "Parallel Analysis: Iteration 3\n", "Parallel Analysis: Iteration 4\n", "Parallel Analysis: Iteration 5\n", "Parallel Analysis: Iteration 6\n", "Parallel Analysis: Iteration 7\n", "Parallel Analysis: Iteration 8\n", "Parallel Analysis: Iteration 9\n", "Parallel Analysis: Iteration 10\n", "Parallel Analysis: Iteration 11\n", "Parallel Analysis: Iteration 12\n", "Parallel Analysis: Iteration 13\n", "Parallel Analysis: Iteration 14\n", "Parallel Analysis: Iteration 15\n", "Parallel Analysis: Iteration 16\n", "Parallel Analysis: Iteration 17\n", "Parallel Analysis: Iteration 18\n", "Parallel Analysis: Iteration 19\n", "Parallel Analysis: Iteration 20\n", "Parallel Analysis: Iteration 21\n", "Parallel Analysis: Iteration 22\n", "Parallel Analysis: Iteration 23\n", "Parallel Analysis: Iteration 24\n", "Parallel Analysis: Iteration 25\n", "Parallel Analysis: Iteration 26\n", "Parallel Analysis: Iteration 27\n", "Parallel Analysis: Iteration 28\n", "Parallel Analysis: Iteration 29\n", "Parallel Analysis: Iteration 30\n", "Parallel Analysis: Iteration 31\n", "Parallel Analysis: Iteration 32\n", "Parallel Analysis: Iteration 33\n", "Parallel Analysis: Iteration 34\n", "Parallel Analysis: Iteration 35\n", "Parallel Analysis: Iteration 36\n", "Parallel Analysis: Iteration 37\n", "Parallel Analysis: Iteration 38\n", "Parallel Analysis: Iteration 39\n", "Parallel Analysis: Iteration 40\n", "Parallel Analysis: Iteration 41\n", "Parallel Analysis: Iteration 42\n", "Parallel Analysis: Iteration 43\n", "Parallel Analysis: Iteration 44\n", "Parallel Analysis: Iteration 45\n", "Parallel Analysis: Iteration 46\n", "Parallel Analysis: Iteration 47\n", "Parallel Analysis: Iteration 48\n", "Parallel Analysis: Iteration 49\n", "Analysis 2016-03-26 02:42:37 +0000\n", "= Statsample::Factor::ParallelAnalysis\n", " There are 3 real factors on data\n", " == Principal Component Analysis\n", " Number of factors: 8\n", " Communalities\n", "+----------+---------+------------+--------+\n", "| Variable | Initial | Extraction | % |\n", "+----------+---------+------------+--------+\n", "| v0 | 1.000 | 0.718 | 71.814 |\n", "| v1 | 1.000 | 0.806 | 80.592 |\n", "| v10 | 1.000 | 0.742 | 74.220 |\n", "| v11 | 1.000 | 0.672 | 67.184 |\n", "| v12 | 1.000 | 0.767 | 76.672 |\n", "| v13 | 1.000 | 0.525 | 52.483 |\n", "| v14 | 1.000 | 0.613 | 61.319 |\n", "| v15 | 1.000 | 0.767 | 76.689 |\n", "| v16 | 1.000 | 0.580 | 58.006 |\n", "| v17 | 1.000 | 0.614 | 61.435 |\n", "| v18 | 1.000 | 0.571 | 57.060 |\n", "| v19 | 1.000 | 0.606 | 60.624 |\n", "| v2 | 1.000 | 0.745 | 74.486 |\n", "| v20 | 1.000 | 0.735 | 73.461 |\n", "| v21 | 1.000 | 0.835 | 83.501 |\n", "| v22 | 1.000 | 0.874 | 87.361 |\n", "| v23 | 1.000 | 0.830 | 82.958 |\n", "| v24 | 1.000 | 0.900 | 90.029 |\n", "| v25 | 1.000 | 0.930 | 93.029 |\n", "| v26 | 1.000 | 0.940 | 94.045 |\n", "| v27 | 1.000 | 0.957 | 95.748 |\n", "| v28 | 1.000 | 0.975 | 97.489 |\n", "| v29 | 1.000 | 0.979 | 97.931 |\n", "| v3 | 1.000 | 0.675 | 67.468 |\n", "| v4 | 1.000 | 0.676 | 67.614 |\n", "| v5 | 1.000 | 0.639 | 63.899 |\n", "| v6 | 1.000 | 0.707 | 70.699 |\n", "| v7 | 1.000 | 0.702 | 70.152 |\n", "| v8 | 1.000 | 0.593 | 59.331 |\n", "| v9 | 1.000 | 0.774 | 77.365 |\n", "+----------+---------+------------+--------+\n", "\n", " Total Variance Explained\n", "+--------------+---------+---------+---------+\n", "| Component | E.Total | % | Cum. % |\n", "+--------------+---------+---------+---------+\n", "| Component 1 | 11.635 | 38.784% | 38.784 |\n", "| Component 2 | 2.228 | 7.425% | 46.209 |\n", "| Component 3 | 1.868 | 6.225% | 52.434 |\n", "| Component 4 | 1.781 | 5.936% | 58.371 |\n", "| Component 5 | 1.503 | 5.009% | 63.380 |\n", "| Component 6 | 1.275 | 4.250% | 67.630 |\n", "| Component 7 | 1.149 | 3.830% | 71.460 |\n", "| Component 8 | 1.009 | 3.362% | 74.822 |\n", "| Component 9 | 0.948 | 3.162% | 77.984 |\n", "| Component 10 | 0.813 | 2.709% | 80.692 |\n", "| Component 11 | 0.776 | 2.585% | 83.278 |\n", "| Component 12 | 0.688 | 2.292% | 85.570 |\n", "| Component 13 | 0.584 | 1.945% | 87.515 |\n", "| Component 14 | 0.516 | 1.719% | 89.235 |\n", "| Component 15 | 0.490 | 1.633% | 90.868 |\n", "| Component 16 | 0.454 | 1.512% | 92.380 |\n", "| Component 17 | 0.416 | 1.388% | 93.768 |\n", "| Component 18 | 0.345 | 1.149% | 94.916 |\n", "| Component 19 | 0.322 | 1.073% | 95.990 |\n", "| Component 20 | 0.276 | 0.919% | 96.908 |\n", "| Component 21 | 0.240 | 0.800% | 97.708 |\n", "| Component 22 | 0.206 | 0.685% | 98.394 |\n", "| Component 23 | 0.132 | 0.439% | 98.832 |\n", "| Component 24 | 0.098 | 0.327% | 99.159 |\n", "| Component 25 | 0.095 | 0.315% | 99.475 |\n", "| Component 26 | 0.064 | 0.215% | 99.690 |\n", "| Component 27 | 0.050 | 0.167% | 99.857 |\n", "| Component 28 | 0.030 | 0.100% | 99.957 |\n", "| Component 29 | 0.010 | 0.034% | 99.990 |\n", "| Component 30 | 0.003 | 0.010% | 100.000 |\n", "+--------------+---------+---------+---------+\n", "\n", " Component matrix\n", "+-----+-------+-------+-------+-------+-------+-------+-------+-------+\n", "| | PC_1 | PC_2 | PC_3 | PC_4 | PC_5 | PC_6 | PC_7 | PC_8 |\n", "+-----+-------+-------+-------+-------+-------+-------+-------+-------+\n", "| v0 | .029 | .599 | .211 | -.365 | -.017 | -.128 | -.379 | .145 |\n", "| v1 | -.011 | .431 | .139 | .104 | -.148 | .729 | -.163 | -.096 |\n", "| v10 | -.309 | .211 | -.302 | -.367 | -.453 | .220 | .102 | .335 |\n", "| v11 | -.386 | .373 | .258 | .394 | -.326 | -.091 | -.001 | .217 |\n", "| v12 | -.351 | .063 | .544 | -.128 | .016 | .019 | .567 | -.070 |\n", "| v13 | -.450 | .432 | .243 | .120 | .114 | -.130 | -.172 | -.046 |\n", "| v14 | -.539 | -.069 | .166 | .273 | .024 | .395 | .018 | .242 |\n", "| v15 | -.576 | -.032 | -.210 | .192 | -.494 | .202 | .215 | -.147 |\n", "| v16 | -.546 | -.378 | .264 | -.110 | .074 | -.006 | .060 | .218 |\n", "| v17 | -.627 | .258 | .005 | .151 | .093 | .131 | -.271 | -.181 |\n", "| v18 | -.716 | -.171 | -.084 | .120 | .020 | .049 | -.050 | .045 |\n", "| v19 | -.718 | .088 | -.063 | .054 | -.249 | .111 | .007 | .047 |\n", "| v2 | -.064 | -.591 | .400 | .197 | -.086 | .126 | -.361 | -.199 |\n", "| v20 | -.826 | -.133 | .017 | .069 | .132 | .099 | -.008 | -.053 |\n", "| v21 | -.882 | -.147 | .001 | -.034 | .014 | .091 | -.141 | -.081 |\n", "| v22 | -.904 | .033 | -.012 | -.147 | .040 | -.149 | -.059 | .078 |\n", "| v23 | -.890 | .023 | .109 | -.112 | .085 | -.012 | .041 | -.056 |\n", "| v24 | -.924 | .125 | -.011 | -.025 | .097 | -.089 | .101 | .054 |\n", "| v25 | -.937 | .054 | -.084 | -.060 | .083 | -.176 | -.006 | -.025 |\n", "| v26 | -.961 | -.054 | -.052 | -.028 | .076 | -.061 | .006 | -.022 |\n", "| v27 | -.965 | -.016 | -.070 | .006 | .122 | -.048 | .016 | -.058 |\n", "| v28 | -.970 | .015 | -.106 | -.009 | .126 | -.070 | .029 | -.012 |\n", "| v29 | -.974 | -.007 | -.108 | .002 | .117 | -.057 | .004 | -.035 |\n", "| v3 | -.036 | .422 | .209 | -.062 | -.345 | -.231 | .368 | -.374 |\n", "| v4 | -.107 | -.546 | .428 | -.164 | -.320 | .069 | .046 | -.217 |\n", "| v5 | .010 | .055 | .656 | .203 | .004 | -.067 | .015 | .400 |\n", "| v6 | .078 | .131 | .503 | -.612 | .130 | .076 | -.140 | -.115 |\n", "| v7 | -.091 | -.236 | .063 | .227 | -.528 | -.439 | -.230 | .240 |\n", "| v8 | -.318 | .010 | -.048 | -.248 | -.445 | -.162 | -.337 | -.300 |\n", "| v9 | -.203 | -.302 | -.115 | -.713 | -.142 | .164 | .011 | .269 |\n", "+-----+-------+-------+-------+-------+-------+-------+-------+-------+\n", "\n", " Traditional Kaiser criterion (k>1) returns 8 factors\n", " == Parallel Analysis\n", " Bootstrap Method: random\n", " Uses SMC: No\n", " Correlation Matrix type : correlation_matrix\n", " Number of variables: 30\n", " Number of cases: 150\n", " Number of iterations: 50\n", " Number or factors to preserve: 4\n", " Eigenvalues\n", "+----+-----------------+----------------------+--------+-----------+\n", "| n | data eigenvalue | generated eigenvalue | p.95 | preserve? |\n", "+----+-----------------+----------------------+--------+-----------+\n", "| 1 | 11.6353 | 1.9397 | 2.0744 | Yes |\n", "| 2 | 2.2275 | 1.7961 | 1.8770 | Yes |\n", "| 3 | 1.8675 | 1.6885 | 1.7637 | Yes |\n", "| 4 | 1.7809 | 1.6032 | 1.6780 | Yes |\n", "| 5 | 1.5027 | 1.5281 | 1.5856 | |\n", "| 6 | 1.2750 | 1.4573 | 1.5253 | |\n", "| 7 | 1.1491 | 1.3892 | 1.4417 | |\n", "| 8 | 1.0086 | 1.3263 | 1.3981 | |\n", "| 9 | 0.9485 | 1.2711 | 1.3060 | |\n", "| 10 | 0.8126 | 1.2174 | 1.2501 | |\n", "| 11 | 0.7756 | 1.1585 | 1.2080 | |\n", "| 12 | 0.6877 | 1.1053 | 1.1429 | |\n", "| 13 | 0.5836 | 1.0553 | 1.0955 | |\n", "| 14 | 0.5158 | 1.0083 | 1.0468 | |\n", "| 15 | 0.4899 | 0.9635 | 1.0054 | |\n", "| 16 | 0.4537 | 0.9161 | 0.9573 | |\n", "| 17 | 0.4164 | 0.8739 | 0.9153 | |\n", "| 18 | 0.3446 | 0.8270 | 0.8561 | |\n", "| 19 | 0.3220 | 0.7909 | 0.8310 | |\n", "| 20 | 0.2756 | 0.7524 | 0.7944 | |\n", "| 21 | 0.2400 | 0.7086 | 0.7465 | |\n", "| 22 | 0.2056 | 0.6678 | 0.7227 | |\n", "| 23 | 0.1316 | 0.6309 | 0.6700 | |\n", "| 24 | 0.0982 | 0.5889 | 0.6263 | |\n", "| 25 | 0.0946 | 0.5534 | 0.5885 | |\n", "| 26 | 0.0645 | 0.5173 | 0.5562 | |\n", "| 27 | 0.0501 | 0.4795 | 0.5195 | |\n", "| 28 | 0.0300 | 0.4405 | 0.4791 | |\n", "| 29 | 0.0101 | 0.3978 | 0.4305 | |\n", "| 30 | 0.0029 | 0.3470 | 0.3938 | |\n", "+----+-----------------+----------------------+--------+-----------+\n", "\n", " Parallel Analysis returns 4 factors to preserve\n", "\n" ] } ], "source": [ "require 'statsample'\n", "samples=150\n", "variables=30\n", "iterations=50\n", "Statsample::Analysis.store(Statsample::Factor::ParallelAnalysis) do\n", " Daru.lazy_update = true\n", " \n", " rng = Distribution::Normal.rng()\n", " f1 = rnorm(samples)\n", " f2 = rnorm(samples)\n", " f3 = rnorm(samples)\n", "\n", " vectors={}\n", "\n", " variables.times do |i|\n", " vectors[\"v#{i}\".to_sym] = Daru::Vector.new(samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call})\n", " vectors[\"v#{i}\".to_sym].rename \"Vector #{i}\"\n", " end\n", "\n", " ds = Daru::DataFrame.new(vectors)\n", "\n", " pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)\n", " pca=pca(cor(ds))\n", " echo \"There are 3 real factors on data\"\n", " summary pca\n", " echo \"Traditional Kaiser criterion (k>1) returns #{pca.m} factors\"\n", " summary pa\n", " echo \"Parallel Analysis returns #{pa.number_of_factors} factors to preserve\"\n", " Daru.lazy_update = false\n", "end\n", "\n", "Statsample::Analysis.run_batch\n" ] } ], "metadata": { "kernelspec": { "display_name": "Ruby 2.2.1", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" } }, "nbformat": 4, "nbformat_minor": 0 }