In [11]:
# Import required libraries
import pandas as pd
from sklearn.datasets import load_iris
from factor_analyzer import FactorAnalyzer
import matplotlib.pyplot as plt
from matplotlib import style
import matplotlib
matplotlib.rcParams['figure.figsize'] = (10.0, 6.0)

style.use('ggplot')
In [12]:
df= pd.read_csv("bfi.csv")
df.drop(['Unnamed: 0','gender', 'education', 'age'],axis=1,inplace=True)
# Dropping missing values rows
df.dropna(inplace=True)
In [13]:
df.head()
Out[13]:
A1 A2 A3 A4 A5 C1 C2 C3 C4 C5 ... N1 N2 N3 N4 N5 O1 O2 O3 O4 O5
0 2.0 4.0 3.0 4.0 4.0 2.0 3.0 3.0 4.0 4.0 ... 3.0 4.0 2.0 2.0 3.0 3.0 6 3.0 4.0 3.0
1 2.0 4.0 5.0 2.0 5.0 5.0 4.0 4.0 3.0 4.0 ... 3.0 3.0 3.0 5.0 5.0 4.0 2 4.0 3.0 3.0
2 5.0 4.0 5.0 4.0 4.0 4.0 5.0 4.0 2.0 5.0 ... 4.0 5.0 4.0 2.0 3.0 4.0 2 5.0 5.0 2.0
3 4.0 4.0 6.0 5.0 5.0 4.0 4.0 3.0 5.0 5.0 ... 2.0 5.0 2.0 4.0 1.0 3.0 3 4.0 3.0 5.0
4 2.0 3.0 3.0 4.0 5.0 4.0 4.0 5.0 3.0 2.0 ... 2.0 3.0 4.0 4.0 3.0 3.0 3 4.0 3.0 3.0

5 rows × 25 columns

In [14]:
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
chi_square_value,p_value=calculate_bartlett_sphericity(df)
chi_square_value, p_value
Out[14]:
(18146.065577234804, 0.0)
In [15]:
from factor_analyzer.factor_analyzer import calculate_kmo
kmo_all,kmo_model=calculate_kmo(df)
kmo_model
Out[15]:
0.8486452309468382
In [16]:
# Create factor analysis object and perform factor analysis
fa = FactorAnalyzer()
fa.analyze(df, 25, rotation=None)
# Check Eigenvalues
ev, v = fa.get_eigenvalues()
ev
Out[16]:
Original_Eigenvalues
0 5.134311
1 2.751887
2 2.142702
3 1.852328
4 1.548163
5 1.073582
6 0.839539
7 0.799206
8 0.718989
9 0.688089
10 0.676373
11 0.651800
12 0.623253
13 0.596563
14 0.563091
15 0.543305
16 0.514518
17 0.494503
18 0.482640
19 0.448921
20 0.423366
21 0.400671
22 0.387804
23 0.381857
24 0.262539
In [17]:
# Create scree plot using matplotlib
plt.scatter(range(1,df.shape[1]+1),ev.values)
plt.plot(range(1,df.shape[1]+1),ev.values)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.axhline(y=1,c='k')
Out[17]:
<matplotlib.lines.Line2D at 0x10c56c550>
In [18]:
# Create factor analysis object and perform factor analysis
fa = FactorAnalyzer()
fa.analyze(df, 6, rotation="varimax")
In [19]:
fa.loadings
Out[19]:
Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
A1 0.040783 0.095220 0.048734 -0.113057 -0.530987 0.161216
A2 0.235538 0.033131 0.133714 0.063734 0.661141 -0.006244
A3 0.343008 -0.009621 0.121353 0.033990 0.605933 0.160106
A4 0.219717 -0.081518 0.235140 -0.125338 0.404594 0.086356
A5 0.414458 -0.149616 0.106382 0.030977 0.469698 0.236519
C1 0.077248 -0.004358 0.554582 0.190124 0.007511 0.095035
C2 0.038370 0.068330 0.674545 0.087593 0.057055 0.152775
C3 0.031867 -0.039994 0.551164 -0.011338 0.101282 0.008996
C4 -0.066241 0.216283 -0.638475 -0.143846 -0.102617 0.318359
C5 -0.180812 0.284187 -0.544838 0.025837 -0.059955 0.132423
E1 -0.590451 0.022280 0.053915 -0.071205 -0.130851 0.156583
E2 -0.684578 0.233624 -0.088497 -0.045561 -0.116716 0.115065
E3 0.556774 -0.000895 0.103390 0.241180 0.179396 0.267291
E4 0.658395 -0.136788 0.113798 -0.107808 0.241143 0.158513
E5 0.507535 0.034490 0.309813 0.200821 0.078804 0.008747
N1 0.068011 0.805806 -0.051264 -0.074977 -0.174849 -0.096266
N2 0.022958 0.789832 -0.037477 0.006726 -0.141134 -0.139823
N3 -0.065687 0.725081 -0.059039 -0.010664 -0.019184 0.062495
N4 -0.345072 0.578319 -0.162174 0.062916 0.000403 0.147551
N5 -0.161675 0.523097 -0.025305 -0.161892 0.090125 0.120049
O1 0.225339 -0.020004 0.133201 0.479477 0.005178 0.218690
O2 -0.001982 0.156230 -0.086047 -0.496640 0.043989 0.134693
O3 0.325954 0.011851 0.093880 0.566128 0.076642 0.210777
O4 -0.177746 0.207281 -0.005671 0.349227 0.133656 0.178068
O5 -0.014221 0.063234 -0.047059 -0.576743 -0.057561 0.135936
In [20]:
import numpy as np
Z=np.abs(fa.loadings)
fig, ax = plt.subplots()
c = ax.pcolor(Z)
fig.colorbar(c, ax=ax)
ax.set_yticks(np.arange(fa.loadings.shape[0])+0.5, minor=False)
ax.set_xticks(np.arange(fa.loadings.shape[1])+0.5, minor=False)
ax.set_yticklabels(fa.loadings.index.values)
ax.set_xticklabels(fa.loadings.columns.values)
plt.show()
In [21]:
# Create factor analysis object and perform factor analysis using 5 factors
fa = FactorAnalyzer()
fa.analyze(df, 5, rotation="varimax")
fa.loadings
Out[21]:
Factor1 Factor2 Factor3 Factor4 Factor5
A1 0.040465 0.111126 0.022798 -0.077931 -0.428166
A2 0.213716 0.029588 0.139037 0.062139 0.626946
A3 0.317848 0.009357 0.109331 0.056196 0.650743
A4 0.204566 -0.066476 0.230584 -0.112700 0.435624
A5 0.393034 -0.122113 0.087869 0.066708 0.537087
C1 0.070184 0.010416 0.545824 0.209584 0.038878
C2 0.033270 0.089574 0.648731 0.115434 0.102782
C3 0.023907 -0.030855 0.557036 -0.005183 0.111578
C4 -0.064984 0.240410 -0.633806 -0.107535 -0.037498
C5 -0.176395 0.290318 -0.562467 0.036822 -0.047525
E1 -0.574835 0.042819 0.033144 -0.058795 -0.104813
E2 -0.678731 0.244743 -0.102483 -0.042010 -0.112517
E3 0.536816 0.024180 0.083010 0.280877 0.257906
E4 0.646833 -0.115614 0.102023 -0.073422 0.306101
E5 0.504069 0.036145 0.312899 0.213739 0.090354
N1 0.078923 0.786807 -0.045997 -0.084704 -0.216363
N2 0.027301 0.754109 -0.030568 -0.010304 -0.193744
N3 -0.061430 0.731721 -0.067084 -0.004217 -0.027712
N4 -0.345388 0.590602 -0.178902 0.075225 0.005886
N5 -0.161291 0.537858 -0.037309 -0.149769 0.100931
O1 0.213005 -0.002224 0.115080 0.504907 0.061550
O2 0.004560 0.175788 -0.099729 -0.468925 0.081809
O3 0.310956 0.026736 0.076873 0.596007 0.126889
O4 -0.191196 0.220582 -0.021906 0.369012 0.155475
O5 -0.005347 0.085401 -0.062730 -0.533778 -0.010384
In [22]:
Z=np.abs(fa.loadings)
fig, ax = plt.subplots()
c = ax.pcolor(Z)
fig.colorbar(c, ax=ax)
ax.set_yticks(np.arange(fa.loadings.shape[0])+0.5, minor=False)
ax.set_xticks(np.arange(fa.loadings.shape[1])+0.5, minor=False)
ax.set_yticklabels(fa.loadings.index.values)
ax.set_xticklabels(fa.loadings.columns.values)
plt.show()
In [23]:
# Get variance of each factors
fa.get_factor_variance()
Out[23]:
Factor1 Factor2 Factor3 Factor4 Factor5
SS Loadings 2.473090 2.709633 2.041106 1.522153 1.844498
Proportion Var 0.098924 0.108385 0.081644 0.060886 0.073780
Cumulative Var 0.098924 0.207309 0.288953 0.349839 0.423619
In [ ]: