{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Parallel Analysis on PCA" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parallel Analysis: Iteration 0\n", "Parallel Analysis: Iteration 1\n", "Parallel Analysis: Iteration 2\n", "Parallel Analysis: Iteration 3\n", "Parallel Analysis: Iteration 4\n", "Parallel Analysis: Iteration 5\n", "Parallel Analysis: Iteration 6\n", "Parallel Analysis: Iteration 7\n", "Parallel Analysis: Iteration 8\n", "Parallel Analysis: Iteration 9\n", "Parallel Analysis: Iteration 10\n", "Parallel Analysis: Iteration 11\n", "Parallel Analysis: Iteration 12\n", "Parallel Analysis: Iteration 13\n", "Parallel Analysis: Iteration 14\n", "Parallel Analysis: Iteration 15\n", "Parallel Analysis: Iteration 16\n", "Parallel Analysis: Iteration 17\n", "Parallel Analysis: Iteration 18\n", "Parallel Analysis: Iteration 19\n", "Parallel Analysis: Iteration 20\n", "Parallel Analysis: Iteration 21\n", "Parallel Analysis: Iteration 22\n", "Parallel Analysis: Iteration 23\n", "Parallel Analysis: Iteration 24\n", "Parallel Analysis: Iteration 25\n", "Parallel Analysis: Iteration 26\n", "Parallel Analysis: Iteration 27\n", "Parallel Analysis: Iteration 28\n", "Parallel Analysis: Iteration 29\n", "Parallel Analysis: Iteration 30\n", "Parallel Analysis: Iteration 31\n", "Parallel Analysis: Iteration 32\n", "Parallel Analysis: Iteration 33\n", "Parallel Analysis: Iteration 34\n", "Parallel Analysis: Iteration 35\n", "Parallel Analysis: Iteration 36\n", "Parallel Analysis: Iteration 37\n", "Parallel Analysis: Iteration 38\n", "Parallel Analysis: Iteration 39\n", "Parallel Analysis: Iteration 40\n", "Parallel Analysis: Iteration 41\n", "Parallel Analysis: Iteration 42\n", "Parallel Analysis: Iteration 43\n", "Parallel Analysis: Iteration 44\n", "Parallel Analysis: Iteration 45\n", "Parallel Analysis: Iteration 46\n", "Parallel Analysis: Iteration 47\n", "Parallel Analysis: Iteration 48\n", "Parallel Analysis: Iteration 49\n", "Analysis 2015-06-04 12:37:28 +0530\n", "= Statsample::Factor::ParallelAnalysis\n", " There are 3 real factors on data\n", " == Principal Component Analysis\n", " Number of factors: 7\n", " Communalities\n", "+----------+---------+------------+--------+\n", "| Variable | Initial | Extraction | % |\n", "+----------+---------+------------+--------+\n", "| v0 | 1.000 | 0.598 | 59.807 |\n", "| v1 | 1.000 | 0.801 | 80.128 |\n", "| v10 | 1.000 | 0.569 | 56.925 |\n", "| v11 | 1.000 | 0.677 | 67.746 |\n", "| v12 | 1.000 | 0.439 | 43.904 |\n", "| v13 | 1.000 | 0.727 | 72.661 |\n", "| v14 | 1.000 | 0.552 | 55.151 |\n", "| v15 | 1.000 | 0.678 | 67.813 |\n", "| v16 | 1.000 | 0.624 | 62.360 |\n", "| v17 | 1.000 | 0.604 | 60.404 |\n", "| v18 | 1.000 | 0.624 | 62.436 |\n", "| v19 | 1.000 | 0.754 | 75.400 |\n", "| v2 | 1.000 | 0.731 | 73.064 |\n", "| v20 | 1.000 | 0.773 | 77.278 |\n", "| v21 | 1.000 | 0.821 | 82.106 |\n", "| v22 | 1.000 | 0.820 | 82.046 |\n", "| v23 | 1.000 | 0.923 | 92.273 |\n", "| v24 | 1.000 | 0.941 | 94.130 |\n", "| v25 | 1.000 | 0.930 | 92.954 |\n", "| v26 | 1.000 | 0.953 | 95.287 |\n", "| v27 | 1.000 | 0.978 | 97.808 |\n", "| v28 | 1.000 | 0.979 | 97.869 |\n", "| v29 | 1.000 | 0.979 | 97.871 |\n", "| v3 | 1.000 | 0.584 | 58.402 |\n", "| v4 | 1.000 | 0.740 | 74.035 |\n", "| v5 | 1.000 | 0.742 | 74.217 |\n", "| v6 | 1.000 | 0.673 | 67.334 |\n", "| v7 | 1.000 | 0.549 | 54.927 |\n", "| v8 | 1.000 | 0.411 | 41.079 |\n", "| v9 | 1.000 | 0.705 | 70.541 |\n", "+----------+---------+------------+--------+\n", "\n", " Total Variance Explained\n", "+--------------+---------+---------+---------+\n", "| Component | E.Total | % | Cum. % |\n", "+--------------+---------+---------+---------+\n", "| Component 1 | 12.649 | 42.163% | 42.163 |\n", "| Component 2 | 2.835 | 9.451% | 51.613 |\n", "| Component 3 | 1.626 | 5.421% | 57.035 |\n", "| Component 4 | 1.349 | 4.497% | 61.532 |\n", "| Component 5 | 1.216 | 4.054% | 65.586 |\n", "| Component 6 | 1.119 | 3.730% | 69.316 |\n", "| Component 7 | 1.085 | 3.616% | 72.932 |\n", "| Component 8 | 0.980 | 3.268% | 76.200 |\n", "| Component 9 | 0.824 | 2.747% | 78.947 |\n", "| Component 10 | 0.785 | 2.618% | 81.565 |\n", "| Component 11 | 0.725 | 2.416% | 83.981 |\n", "| Component 12 | 0.699 | 2.330% | 86.311 |\n", "| Component 13 | 0.651 | 2.169% | 88.480 |\n", "| Component 14 | 0.538 | 1.792% | 90.272 |\n", "| Component 15 | 0.457 | 1.524% | 91.797 |\n", "| Component 16 | 0.423 | 1.410% | 93.207 |\n", "| Component 17 | 0.402 | 1.339% | 94.547 |\n", "| Component 18 | 0.321 | 1.068% | 95.615 |\n", "| Component 19 | 0.295 | 0.984% | 96.599 |\n", "| Component 20 | 0.240 | 0.800% | 97.398 |\n", "| Component 21 | 0.222 | 0.740% | 98.138 |\n", "| Component 22 | 0.185 | 0.616% | 98.754 |\n", "| Component 23 | 0.107 | 0.356% | 99.110 |\n", "| Component 24 | 0.098 | 0.326% | 99.436 |\n", "| Component 25 | 0.057 | 0.189% | 99.625 |\n", "| Component 26 | 0.052 | 0.173% | 99.798 |\n", "| Component 27 | 0.033 | 0.110% | 99.908 |\n", "| Component 28 | 0.019 | 0.065% | 99.972 |\n", "| Component 29 | 0.006 | 0.021% | 99.993 |\n", "| Component 30 | 0.002 | 0.007% | 100.000 |\n", "+--------------+---------+---------+---------+\n", "\n", " Component matrix\n", "+-----+-------+-------+-------+-------+-------+-------+-------+\n", "| | PC_1 | PC_2 | PC_3 | PC_4 | PC_5 | PC_6 | PC_7 |\n", "+-----+-------+-------+-------+-------+-------+-------+-------+\n", "| v0 | .008 | .642 | .122 | -.027 | .261 | .312 | .066 |\n", "| v1 | -.145 | .101 | -.479 | -.279 | .520 | -.385 | -.210 |\n", "| v10 | .410 | .323 | .295 | -.149 | .238 | -.013 | -.362 |\n", "| v11 | .308 | .034 | -.299 | -.122 | .331 | .460 | .394 |\n", "| v12 | .482 | -.121 | -.192 | .110 | -.067 | .209 | -.308 |\n", "| v13 | .448 | .382 | -.264 | .412 | .057 | -.366 | -.060 |\n", "| v14 | .519 | .288 | -.127 | -.355 | -.198 | .028 | -.130 |\n", "| v15 | .624 | .338 | .206 | .161 | -.143 | .012 | -.291 |\n", "| v16 | .707 | .020 | .159 | .291 | .058 | .065 | .080 |\n", "| v17 | .721 | -.110 | -.249 | -.042 | -.006 | -.044 | -.082 |\n", "| v18 | .765 | -.006 | .122 | -.102 | .078 | .035 | .076 |\n", "| v19 | .820 | -.027 | -.143 | -.059 | -.023 | .099 | -.214 |\n", "| v2 | .131 | .703 | .007 | .085 | -.179 | .420 | .059 |\n", "| v20 | .835 | .014 | .043 | -.044 | -.038 | .032 | .262 |\n", "| v21 | .883 | -.072 | .032 | -.038 | .064 | -.012 | .174 |\n", "| v22 | .898 | -.041 | .018 | .061 | .058 | -.075 | .012 |\n", "| v23 | .946 | -.086 | .036 | -.097 | .014 | -.041 | .083 |\n", "| v24 | .964 | -.065 | .002 | .048 | .011 | -.058 | .040 |\n", "| v25 | .956 | -.048 | -.031 | .009 | -.044 | -.090 | .044 |\n", "| v26 | .965 | -.126 | .024 | -.045 | .031 | .017 | .038 |\n", "| v27 | .974 | -.136 | .036 | -.058 | -.027 | -.071 | .034 |\n", "| v28 | .974 | -.139 | .052 | -.045 | .012 | -.058 | .038 |\n", "| v29 | .975 | -.145 | .047 | -.037 | .002 | -.057 | .033 |\n", "| v3 | -.090 | -.161 | -.687 | .065 | .190 | .140 | .135 |\n", "| v4 | -.072 | .734 | -.055 | -.273 | .039 | -.260 | .222 |\n", "| v5 | .066 | .463 | .128 | .552 | .300 | -.210 | .261 |\n", "| v6 | .017 | .478 | -.203 | -.068 | -.515 | -.283 | .232 |\n", "| v7 | .245 | .527 | .061 | -.293 | .189 | .098 | -.276 |\n", "| v8 | .167 | .185 | -.394 | -.207 | -.383 | .030 | .058 |\n", "| v9 | .267 | .161 | -.463 | .497 | -.118 | .186 | -.313 |\n", "+-----+-------+-------+-------+-------+-------+-------+-------+\n", "\n", " Traditional Kaiser criterion (k>1) returns 7 factors\n", " == Parallel Analysis\n", " Bootstrap Method: random\n", " Uses SMC: No\n", " Correlation Matrix type : correlation_matrix\n", " Number of variables: 30\n", " Number of cases: 150\n", " Number of iterations: 50\n", " Number or factors to preserve: 2\n", " Eigenvalues\n", "+----+-----------------+----------------------+--------+-----------+\n", "| n | data eigenvalue | generated eigenvalue | p.95 | preserve? |\n", "+----+-----------------+----------------------+--------+-----------+\n", "| 1 | 12.6488 | 1.9482 | 2.0426 | Yes |\n", "| 2 | 2.8352 | 1.8029 | 1.8892 | Yes |\n", "| 3 | 1.6264 | 1.7055 | 1.8083 | |\n", "| 4 | 1.3492 | 1.6212 | 1.7078 | |\n", "| 5 | 1.2162 | 1.5343 | 1.6195 | |\n", "| 6 | 1.1190 | 1.4597 | 1.5586 | |\n", "| 7 | 1.0847 | 1.3873 | 1.4633 | |\n", "| 8 | 0.9804 | 1.3198 | 1.3579 | |\n", "| 9 | 0.8242 | 1.2639 | 1.3108 | |\n", "| 10 | 0.7853 | 1.2097 | 1.2580 | |\n", "| 11 | 0.7248 | 1.1571 | 1.2002 | |\n", "| 12 | 0.6991 | 1.1072 | 1.1427 | |\n", "| 13 | 0.6506 | 1.0566 | 1.0902 | |\n", "| 14 | 0.5376 | 1.0097 | 1.0522 | |\n", "| 15 | 0.4573 | 0.9611 | 1.0041 | |\n", "| 16 | 0.4231 | 0.9127 | 0.9611 | |\n", "| 17 | 0.4018 | 0.8725 | 0.9004 | |\n", "| 18 | 0.3205 | 0.8256 | 0.8674 | |\n", "| 19 | 0.2951 | 0.7902 | 0.8363 | |\n", "| 20 | 0.2399 | 0.7452 | 0.7848 | |\n", "| 21 | 0.2219 | 0.7063 | 0.7378 | |\n", "| 22 | 0.1849 | 0.6680 | 0.7120 | |\n", "| 23 | 0.1067 | 0.6306 | 0.6696 | |\n", "| 24 | 0.0978 | 0.5933 | 0.6302 | |\n", "| 25 | 0.0566 | 0.5507 | 0.5993 | |\n", "| 26 | 0.0520 | 0.5155 | 0.5522 | |\n", "| 27 | 0.0329 | 0.4733 | 0.5060 | |\n", "| 28 | 0.0194 | 0.4336 | 0.4700 | |\n", "| 29 | 0.0063 | 0.3954 | 0.4309 | |\n", "| 30 | 0.0020 | 0.3425 | 0.3953 | |\n", "+----+-----------------+----------------------+--------+-----------+\n", "\n", " Parallel Analysis returns 2 factors to preserve\n", "\n" ] } ], "source": [ "require 'statsample'\n", "samples=150\n", "variables=30\n", "iterations=50\n", "Statsample::Analysis.store(Statsample::Factor::ParallelAnalysis) do\n", " Daru.lazy_update = true\n", " \n", " rng = Distribution::Normal.rng()\n", " f1 = rnorm(samples)\n", " f2 = rnorm(samples)\n", " f3 = rnorm(samples)\n", "\n", " vectors={}\n", "\n", " variables.times do |i|\n", " vectors[\"v#{i}\".to_sym] = Daru::Vector.new(samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call})\n", " vectors[\"v#{i}\".to_sym].rename \"Vector #{i}\"\n", " end\n", "\n", " ds = Daru::DataFrame.new(vectors)\n", "\n", " pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)\n", " pca=pca(cor(ds))\n", " echo \"There are 3 real factors on data\"\n", " summary pca\n", " echo \"Traditional Kaiser criterion (k>1) returns #{pca.m} factors\"\n", " summary pa\n", " echo \"Parallel Analysis returns #{pa.number_of_factors} factors to preserve\"\n", " Daru.lazy_update = false\n", "end\n", "\n", "Statsample::Analysis.run_batch\n" ] } ], "metadata": { "kernelspec": { "display_name": "Ruby 2.2.1", "language": "ruby", "name": "ruby" }, "language_info": { "file_extension": "rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" } }, "nbformat": 4, "nbformat_minor": 0 }