# The simulation studies presented below are based on the article # http://dergipark.gov.tr/download/article-file/343265 #-----------------------------------------------------------------------------------------# # Study 1: Item Parameter Recovery in IRT install.packages("mirt") library("mirt") itemrecovery <- function(nitem, sample.size, seed) { #Set the seed and generate the parameters set.seed(seed) a <- as.matrix(round(rlnorm(nitem, meanlog = 0.3, sdlog = 0.2),3), ncol=1) b <- as.matrix(round(rnorm(nitem, mean = 0, sd = 1),3), ncol=1) c <- as.matrix(round(rbeta(nitem, shape1 = 20, shape2 = 90),3), ncol=1) ability <- as.matrix(round(rnorm(sample.size, mean = 0, sd = 1),3), ncol=1) #Simulate response data and estimate item parameters dat <- simdata(a = a, d = b, N = sample.size, itemtype = 'dich', guess = c, Theta = ability) model3PL <- mirt(data=dat, 1, itemtype='3PL', SE=TRUE, verbose=FALSE) #Extract estimated item parameters and compute bias and RMSE parameters <- as.data.frame(coef(model3PL, simplify=TRUE)$items) bias.a <- round(mean(parameters[,1]-a), 3) bias.b <- round(mean(parameters[,2]-b), 3) bias.c <- round(mean(parameters[,3]-c), 3) rmse.a <- round(sqrt(mean((parameters[,1]-a)^2)), 3) rmse.b <- round(sqrt(mean((parameters[,2]-b)^2)), 3) rmse.c <- round(sqrt(mean((parameters[,3]-c)^2)), 3) #Combine the results in a single data set result <- data.frame(sample.size=sample.size, nitem=nitem, bias.a=bias.a, bias.b=bias.b, bias.c=bias.c, rmse.a=rmse.a, rmse.b=rmse.b, rmse.c=rmse.c) return(result) } #Generate 100 random integers as seeds myseed <- sample.int(n = 1000000, size = 100) write.csv(myseed, "simulation seeds.txt", row.names = FALSE) #Define an empty data frame to store the simulation results result <- data.frame(sample.size=0, nitem=0, bias.a=0, bias.b=0, bias.c=0, rmse.a=0, rmse.b=0, rmse.c=0) #Run the loop and return the results across 100 iterations for (i in 1:length(myseed)) { result[i,] <- itemrecovery(nitem = 20, sample.size = 1000, seed = myseed[i]) } round(colMeans(result), 3) #-------------------------------------------------------------------------------------# # Study 2: Detecting Differential Item Functioning in Multidimensional IRT install.packages("doParallel") library("doParallel") library("mirt") library("MASS") detectDIF <- function(sample.size, DIF.size, cor, seed) { require("mirt") require("MASS") set.seed(seed) #Define multidimensional abilities for reference and focal groups theta.ref <- mvrnorm(n = sample.size[1], rep(0, 2), matrix(c(1,cor,cor,1),2,2)) theta.foc <- mvrnorm(n = sample.size[2], rep(0, 2), matrix(c(1,cor,cor,1),2,2)) #Generate item parameters for reference and focal groups a1 <- c(runif(n = 15, min = 1.1, max = 2.8), rep(0,15)) a2 <- c(rep(0,15), runif(n = 15, min = 1.1, max = 2.8)) a.ref <- as.matrix(cbind(a1, a2), ncol = 2) b1 <- runif(n = 30, min = 0.67, max = 2) b2 <- b1 - runif(n = 30, min = 0.67, max = 1.34) b3 <- b2 - runif(n = 30, min = 0.67, max = 1.34) b.ref <- as.matrix(cbind(b1, b2, b3), ncol = 3) #Uniform and nonuniform DIF for items 1, 7, 15, 16, 23, and 30 b.foc <- b.ref b.foc[c(1,7,15,16,23,30),] <- b.foc[c(1,7,15,16,23,30),]+DIF.size[1] a.foc <- a.ref a.foc[c(1,7,15),1] <- a.foc[c(1,7,15),1]+DIF.size[2] a.foc[c(16,23,30),2] <- a.foc[c(16,23,30),2]+DIF.size[2] #Generate item responses according to MGRM ref <- simdata(a = a.ref, d = b.ref, itemtype = 'graded', Theta = theta.ref) foc <- simdata(a = a.foc, d = b.foc, itemtype = 'graded', Theta = theta.foc) dat <- rbind(ref, foc) #Define the group variable (0=reference; 1=focal) and test DIF using mirt group <- c(rep("0", sample.size[1]), rep("1", sample.size[2])) itemnames <- colnames(dat) model <- 'f1 = 1-15 f2 = 16-30 COV = F1*F2' model.mgrm <- mirt.model(model) #Test uniform DIF if(DIF.size[1]>0 & DIF.size[2]==0) { mod0 <- multipleGroup(data = dat, model = model.mgrm, group = group, invariance = c(itemnames[-c(1,7,15,16,23,30)], 'free_means', 'free_var'), verbose = FALSE) mod1 <- DIF(mod0, c('d1','d2','d3'), items2test = c(1,7,15,16,23,30)) result <- data.frame(items=c(1,7,15,16,23,30), DIF=c(mod1[[1]][2,8], mod1[[2]][2,8], mod1[[3]][2,8], mod1[[4]][2,8], mod1[[5]][2,8], mod1[[6]][2,8])) } else #Test nonuniform DIF if(DIF.size[1]==0 & DIF.size[2]>0) { mod0 <- multipleGroup(data = dat, model = model.mgrm, group = group, invariance = c(itemnames[-c(1,7,15,16,23,30)], 'free_means', 'free_var'), verbose = FALSE) mod1 <- DIF(mod0, c('a1'), items2test = c(1,7,15)) mod2 <- DIF(mod0, c('a2'), items2test = c(16,23,30)) result <- data.frame(items=c(1,7,15,16,23,30), DIF=c(mod1[[1]][2,8], mod1[[2]][2,8], mod1[[3]][2,8], mod2[[1]][2,8], mod2[[2]][2,8], mod2[[3]][2,8])) } else #Test type I error if(DIF.size[1]==0 & DIF.size[2]==0) { mod0 <- multipleGroup(data = dat, model = model.mgrm, group = group, invariance = c(itemnames[-c(1,7,15,16,23,30)], 'free_means', 'free_var'), verbose = FALSE) mod1 <- DIF(mod0, c('a1','d1','d2','d3'), items2test = c(1,7,15)) mod2 <- DIF(mod0, c('a2','d1','d2','d3'), items2test = c(16,23,30)) result <- data.frame(items=c(1,7,15,16,23,30), DIF=c(mod1[[1]][2,8], mod1[[2]][2,8], mod1[[3]][2,8], mod2[[1]][2,8], mod2[[2]][2,8], mod2[[3]][2,8])) } return(result) } # Set the seed myseed <- sample.int(n=10000, size = 30) detectCores() registerDoParallel(4) # Run the simulation using parallel computing result <- foreach (i = 1:30, .combine=rbind) %dopar% { detectDIF(sample.size=c(1000, 200), DIF.size=c(0.3, 0), cor=0.3, seed=myseed[i]) } round(mean(ifelse(result$DIF < 0.05, 1, 0)),2) #-------------------------------------------------------------------------------------# # Study 3: Investigating Unidimensionality install.packages("sirt") library("sirt") library("mirt") library("MASS") detectDIM <- function(sample.size, testLength1, testLength2, cor, seed) { require("mirt") require("MASS") require("sirt") set.seed(seed) # Generate item discrimination and difficulty parameters a1 <- c(runif(n = testLength1, min = 1.1, max = 2.8), rep(0, testLength2)) a2 <- c(rep(0, testLength1), runif(n = testLength2, min = 1.1, max = 2.8)) disc.matrix <- as.matrix(cbind(a1, a2), ncol = 2) difficulty <- runif(n = (testLength1 + testLength2), min = 0.67, max = 2) # Specify inter-dimensional correlations sigma <- matrix(c(1, cor, cor, 1), 2, 2) # Simulate data dataset <- simdata(disc.matrix, difficulty, sample.size, itemtype = 'dich', sigma = sigma) # Analyze the simulated data by fitting a unidimensional model noharmOneFactorSolution <- noharm.sirt(dat = dataset , dimensions = 1) # Summarize the results result <- data.frame(sample.size = sample.size , testLength1 = testLength1, testLength2 = testLength2, cor = cor, RMSEA = noharmOneFactorSolution$rmsea) return(result) } # Set the seed myseed <- sample.int(n = 1000000, size = 100) write.csv(myseed, "simulation seeds.csv", row.names = FALSE) write.table(matrix(c("Sample Size", "Test Length 1", "Test Length 2", "Correlation","Mean RMSEA"), 1, 5), "results.csv", sep = ",", col.names = FALSE, row.names = FALSE) # Create an empty data frame to save the results result <- data.frame(sample.size = 0, testLength1 = 0, testLength2 = 0, cor = 0, RMSEA = 0) # Run all the input values through loops sample.size <- c(500, 1000, 3000) test.length2 <- c(10, 20, 30) correlation <- c(0.3, 0.6, 0.9) for (ss in 1:length(sample.size)) { for (tl in 1:length(test.length2)) { for (k in 1:length(correlation)) { for (i in 1:length(myseed)) { result[i, ] <-detectDIM(sample.size = sample.size[ss],testLength1 = 30, testLength2 = test.length2[tl], cor = correlation[k], seed = myseed[i]) } meanRMSEAs <- round(colMeans(result), 3) write.table(matrix(meanRMSEAs, 1, 5), "results.csv", sep = ",", col.names = FALSE, row.names = FALSE, append = TRUE) } } } # Read in the summary results in summary <- read.csv("results.csv", header = TRUE) summary$Sample.Size <- factor(paste0("Sample Size=",summary$Sample.Size), levels = c("Sample Size=500", "Sample Size=1000", "Sample Size=3000")) summary$Test.Length.2 <- factor(paste0("Test Length 2=",summary$Test.Length.2)) summary$Correlation <- factor(summary$Correlation) library(lattice) dotplot(Correlation ~ Mean.RMSEA | Sample.Size*Test.Length.2 , data = summary, pch=c(2), cex=1.5, xlab = "Average RMSEA", aspect=0.5, layout = c(3,3), ylab="Correlation", xlim=c(0,0.3))