Frequency and Response Latencies with Regression

Erin M. Buchanan

Last Knitted: 2021-12-22

Language Topics Discussed

English Lexicon Project

English Lexicon Project

Subtitle Projects

Subtitle Projects

The Semantic Priming Project

The Semantic Priming Project

The Semantic Priming Project

Regression

Understand Regression Models

\[{\hat{y_i}} = b_0 + b_1x_{1i} + b_2x_{2i} ... + \epsilon_i\]

Understand Regression Models

Understand Regression Models

Examples Using ELP

library(Rling)
data(ELP)
head(ELP)
##         Word Length SUBTLWF POS Mean_RT
## 1    rackets      7    0.96  NN  790.87
## 2 stepmother     10    4.24  NN  692.55
## 3 delineated     10    0.04  VB  960.45
## 4   swimmers      8    1.49  NN  771.13
## 5     umpire      6    1.06  NN  882.50
## 6      cobra      5    3.33  NN  645.85

Dealing with Categorical Predictors

Dummy Coding

Dummy Coding

table(ELP$POS)
## 
##  JJ  NN  VB 
## 159 532 189

Dummy Coding

ELP$POS <- factor(ELP$POS, #the column you want to update
                 #the values in the data in the order you want
                 levels = c("NN", "JJ", "VB"), 
                 #give them better labels if you want
                 labels = c("Noun", "Adjective", "Verb")) 
table(ELP$POS)
## 
##      Noun Adjective      Verb 
##       532       159       189

Dealing with Non-Normal Data

Dealing with Non-Normal Data

hist(ELP$SUBTLWF, breaks = 100)

Dealing with Non-Normal Data

Dealing with Non-Normal Data

ELP$Log_SUB <- log(ELP$SUBTLWF)
hist(ELP$Log_SUB)

Build the Linear Model

model <- lm(Mean_RT ~ Length + Log_SUB + POS,
           data = ELP)

Summarize the Linear Model

summary(model)
## 
## Call:
## lm(formula = Mean_RT ~ Length + Log_SUB + POS, data = ELP)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -213.70  -62.55   -9.71   53.87  389.00 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   616.351     12.233  50.385  < 2e-16 ***
## Length         19.555      1.433  13.645  < 2e-16 ***
## Log_SUB       -29.288      1.784 -16.420  < 2e-16 ***
## POSAdjective    6.115      8.506   0.719  0.47238    
## POSVerb       -23.069      7.918  -2.913  0.00367 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 93.29 on 875 degrees of freedom
## Multiple R-squared:  0.4565, Adjusted R-squared:  0.454 
## F-statistic: 183.7 on 4 and 875 DF,  p-value: < 2.2e-16

Residuals

summary(model$residuals)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -213.699  -62.551   -9.714    0.000   53.874  388.998

Coefficients

Coefficients

options(scipen = 999)
round(summary(model)$coefficients, 3)
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)   616.351     12.233  50.385    0.000
## Length         19.555      1.433  13.645    0.000
## Log_SUB       -29.288      1.784 -16.420    0.000
## POSAdjective    6.115      8.506   0.719    0.472
## POSVerb       -23.069      7.918  -2.913    0.004

Coefficients

tapply(ELP$Mean_RT, #dv 
       ELP$POS, #iv group variable
       mean) #function 
##      Noun Adjective      Verb 
##  787.5959  822.9145  754.3316

Coefficient Confidence Intervals

confint(model)
##                  2.5 %    97.5 %
## (Intercept)  592.34193 640.36007
## Length        16.74194  22.36757
## Log_SUB      -32.78872 -25.78704
## POSAdjective -10.57915  22.80935
## POSVerb      -38.61021  -7.52737

Coefficient Practical Importance

\[ \frac{t_{x_i}}{\sqrt{t_{x_i}^2 + df_{res}}} \]

t <- summary(model)$coefficients[-1 , 3]
pr <- t / sqrt(t^2 + model$df.residual)
pr^2
##       Length      Log_SUB POSAdjective      POSVerb 
## 0.1754422571 0.2355448602 0.0005903474 0.0096065265

Overall Model

summary(model)$fstatistic
##    value    numdf    dendf 
## 183.7024   4.0000 875.0000

Overall Model

summary(model)$r.squared
## [1] 0.4564574

Diagnostic Tests

Outliers

Outliers

library(car)
influencePlot(model)

##        StudRes         Hat       CookD
## 16   1.0366190 0.027128347 0.005992376
## 207 -0.5110465 0.030963411 0.001670423
## 331  2.9961984 0.020366762 0.036990327
## 411  3.8500613 0.002244803 0.006566174
## 498  4.2181812 0.004047706 0.014190408
## 660  3.7238861 0.008750676 0.024129118

Outliers

ELP[c(331,660,498,411), ]
##                  Word Length SUBTLWF       POS Mean_RT    Log_SUB
## 331 interdepartmental     17    0.04 Adjective 1324.57 -3.2188758
## 660      sacrilegious     12    0.39 Adjective 1228.06 -0.9416085
## 498           whippet      7    0.10      Noun 1209.67 -2.3025851
## 411         archenemy      9    0.25      Noun 1188.91 -1.3862944

Assumptions

Additivity

summary(model, correlation = T)$correlation[ , -1]
##                   Length      Log_SUB POSAdjective      POSVerb
## (Intercept)  -0.94238493 -0.272940500  -0.09904041 -0.232585764
## Length        1.00000000  0.340416664  -0.05527619  0.066688453
## Log_SUB       0.34041666  1.000000000   0.09363100  0.006852748
## POSAdjective -0.05527619  0.093631001   1.00000000  0.237177357
## POSVerb       0.06668845  0.006852748   0.23717736  1.000000000

Additivity

vif(model)
##             GVIF Df GVIF^(1/(2*Df))
## Length  1.151054  1        1.072872
## Log_SUB 1.150140  1        1.072446
## POS     1.026925  2        1.006664

Linearity

plot(model, which = 2)

Normality

hist(scale(residuals(model)))

Homoscedasticity/Homogeneity

plot(model, which = 1)

Homoscedasticity/Homogeneity

{plot(scale(residuals(model)), scale(model$fitted.values))
  abline(v = 0, h = 0)}

One Solution to Bad Assumptions

bootcoef <- function(formula, data, indices){
  d = data[indices, ] #randomize the data by row
  model = lm(formula, data = d) #run our model
  return(coef(model)) #give back coefficients
}

Bootstrapping

library(boot)
model.boot <- boot(formula = Mean_RT ~ Length + Log_SUB + POS,
                  data = ELP,
                  statistic = bootcoef,
                  R = 1000)

Bootstrapping

model.boot
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = ELP, statistic = bootcoef, R = 1000, formula = Mean_RT ~ 
##     Length + Log_SUB + POS)
## 
## 
## Bootstrap Statistics :
##       original      bias    std. error
## t1* 616.350998 -0.25564332   12.557699
## t2*  19.554757  0.05627581    1.548970
## t3* -29.287879  0.06915177    1.752018
## t4*   6.115101 -0.01991016    9.160950
## t5* -23.068792 -0.10379626    7.039374

CIs for Bootstrapped Estimates

boot.ci(model.boot, index = 3)
## Warning in boot.ci(model.boot, index = 3): bootstrap variances needed for
## studentized intervals
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = model.boot, index = 3)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   (-32.79, -25.92 )   (-32.83, -25.81 )  
## 
## Level     Percentile            BCa          
## 95%   (-32.77, -25.74 )   (-32.82, -25.81 )  
## Calculations and Intervals on Original Scale

Summary