
# install.packages("difNLR")
library(difNLR)
# install.packages("ggplot2")
library(ggplot2)

#-----------------------------------------------------------------------------
#                         IMPLEMENTATION IN EXAMPLES                         
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# DIF DETECTION AMONG BINARY DATA
#-----------------------------------------------------------------------------

#-----------------
# DATA GENERATION
#-----------------

# setting parameters
# discrimination
a <- matrix(rep(c(1.00, 1.12, 1.45, 1.25, 1.32, 1.38, 1.44, 0.89, 1.15,
                  1.30, 1.29, 1.46, 1.16, 1.26, 0.98), 2), ncol = 2)
# difficulty
b <- matrix(rep(c(1.34, 0.06, 1.62, 0.24, -1.45, -0.10, 1.76, 1.96, -1.53,
                  -0.44, -1.67, 1.91, 1.62, 1.79, -0.21), 2), ncol = 2)
# guessing
c <- matrix(rep(c(0.00, 0.00, 0.00, 0.00, 0.00, 0.17, 0.18, 0.05, 0.10,
                  0.11, 0.15, 0.20, 0.21, 0.23, 0.24), 2), ncol = 2)
# inattention
d <- matrix(rep(c(1.00, 1.00, 1.00, 0.92, 0.87, 1.00, 1.00, 0.88, 0.93,
                  0.94, 0.81, 0.98, 0.87, 0.96, 0.85), 2), ncol = 2)

# introducing DIF in items 5, 8, 11 and 15
b[5, 2] <- b[5, 2] + 1
a[8, 2] <- a[8, 2] + 1
d[11, 2] <- 1
c[15, 2] <- 0

# generating dichotomous data with parameters a, b, c, d
set.seed(42)
df <- genNLR(N = 1000, a = a, b = b, c = c, d = d)
head(df[, c(1:5, 16)])
DataDIF <- df[, 1:15]
groupDIF <- df[, 16]

#-----------------
# BASIC DIF DETECTION
#-----------------

# performing DIF detection based on 4PL model
(fit1 <- difNLR(DataDIF, groupDIF, focal.name = 1, model = "4PL", type = "all"))

# estimated parameters
round(coef(fit1, simplify = TRUE), 3)
# estimated parameters with SE for item 5
round(coef(fit1, SE = TRUE)[[5]], 3)

# plot of characteristic curves of DIF items
plot(fit1, item = fit1$DIFitems)

# performing DIF detection with item specific models, types and/or constraints
# item specific model
model <- c("1PL", rep("2PL", 2), rep("3PL", 2), rep("3PLd", 2), rep("4PL", 8))
fit2 <- difNLR(DataDIF, groupDIF, focal.name = 1, model = model, type = "all")
fit2$DIFitems

# item specific type
type <- rep("all", 15)
type[5] <- "b"; type[8] <- "a"; type[11] <- "c"; type[15] <- "d"
fit3 <- difNLR(DataDIF, groupDIF, focal.name = 1, model = model, type = type)
fit3$DIFitems

# item specific constraints
constraints <- rep(NA, 15)
constraints[5] <- "ac"; constraints[8] <- "bcd";
constraints[11] <- "abd"; constraints[15] <- "abc"
fit4 <- difNLR(DataDIF, groupDIF, focal.name = 1, model = model,
               constraints = constraints, type = type)
fit4$DIFitems

# fit measures - AIC, BIC
df <- data.frame(AIC = c(AIC(fit2), AIC(fit3), AIC(fit4)),
                 BIC = c(BIC(fit2), BIC(fit3), BIC(fit4)),
                 Fit = paste("fit", rep(2:4, each = 15), sep = ""),
                 Item = as.factor(rep(1:15, 3)))

(g2a <- ggplot(df, aes(x = Item, y = AIC, col = Fit)) +
    geom_point())
(g2b <- ggplot(df, aes(x = Item, y = BIC, col = Fit)) +
    geom_point())

# fit measures are item specific
logLik(fit3, item = 8)
logLik(fit4, item = 8)

# predicted values
predict(fit1, item = 5, group = 0, match = 0)
predict(fit1, item = 5, group = 1, match = 0)

#-----------------------------------------------------------------------------
# DIF DETECTION AMONG ORDINAL DATA
#-----------------------------------------------------------------------------
#-----------------
# DATA GENERATION
#-----------------

# setting parameters
set.seed(42)
# discrimination
a <- matrix(rep(runif(5, 0.25, 1), 8), ncol = 8)
# difficulty
b <- t(sapply(1:5, function(i) rep(sort(runif(4, -1, 1)), 2)))

# introducing DDF in items 1 and 2
b[1, 5:8] <- b[1, 5:8] + 0.1
a[2, 5:8] <- a[2, 5:8] - 0.2

# generating ordinal data with parameters a and b
DataORD <- genNLR(N = 1000, itemtype = "ordinal", a = a, b = b)
summary(DataORD)

#-----------------
# DIF DETECTION WITH CUMULATIVE LOGIT MODEL
#-----------------

# fitting DIF cumulative logit model
(fit5 <- difORD(DataORD, group = "group", focal.name = 1, model = "cumulative"))

# plot of cumulative probabilities of DDF items
plot(fit5, item = "Item1", plot.type = "cumulative")
# plot of characteristic curves of DDF items
plot(fit5, item = "Item1", plot.type = "category")

#-----------------
# DIF DETECTION WITH ADJACENT LOGIT MODEL
#-----------------

# fitting DIF adjacent category logit model
(fit6 <- difORD(DataORD, group = 6, focal.name = 1, model = "adjacent"))

# plot of characteristic curves of DDF items
plot(fit6, item = fit6$DIFitems)

# using classic intercept-slope parametrization
fit6a <- difORD(DataORD, group = 6, focal.name = 1, model = "adjacent",
                parametrization = "classic")

# coefficients with IRT parametrization
round(coef(fit6)[[1]], 3)
# coefficients with classic intercept-slope parametrization
round(coef(fit6a)[[1]], 3)

#-----------------------------------------------------------------------------
# DDF DETECTION AMONG NOMINAL DATA
#-----------------------------------------------------------------------------

#-----------------
# DATA GENERATION
#-----------------

# setting parameters
set.seed(42)
# discrimination
a <- matrix(rep(runif(30, -2, -0.5), 2), ncol = 6)
a[1:5, c(3, 6)] <- NA
# difficulty
b <- matrix(rep(runif(30, -3, 1), 2), ncol = 6)
b[1:5, c(3, 6)] <- NA

# introducing DDF in items 1 and 6
a[1, 4] <- a[1, 1] - 1; a[1, 5] <- a[1, 2] + 1
b[6, 4] <- b[6, 1] - 1; b[6, 5] <- b[6, 2] - 1.5

# generating nominal data with parameters a and b
DataDDF <- genNLR(N = 1000, itemtype = "nominal", a = a, b = b)
head(DataDDF)

#-----------------
# DDF DETECTION
#-----------------

# fitting DDF model
(fit7 <- ddfMLR(DataDDF, group = "group", focal.name = 1, key = rep("A", 10)))

# plot of characteristic curves of DDF items
plot(fit7, item = fit7$DDFitems, group.names = c("Group 1", "Group 2"))

#-----------------------------------------------------------------------------
# FURTHER FEATURES
#-----------------------------------------------------------------------------

#-----------------
# ANCHOR ITEMS
#-----------------

fit8a <- difNLR(DataDIF[, 1:7], groupDIF, focal.name = 1, match = "score", 
                model = "4PL", type = "all")
fit8a$DIFitems

fit8b <- difNLR(DataDIF[, 1:7], groupDIF, focal.name = 1, match = "score", 
                model = "4PL", type = "all", anchor = c(1:4, 7))
fit8b$DIFitems

#-----------------
# ITEM PURIFICATION
#-----------------

fit9 <- difNLR(DataDIF[, 1:7], groupDIF, focal.name = 1, match = "score", 
               model = "4PL", type = "all", purify = TRUE)

# purification scheme
fit9$difPur

#-----------------
# MULTIPLE COMPARISON CORRECTIONS
#-----------------

# Holm's p-value adjustment
fit10 <- difNLR(DataDIF[, 1:7], groupDIF, focal.name = 1, match = "score", 
                model = "4PL", type = "all", p.adjust.method = "holm")
fit10$DIFitems


# combining item purification and Holm's p-value adjustment
fit11 <- difNLR(DataDIF[, 1:7], groupDIF, focal.name = 1, match = "score", 
                model = "4PL", type = "all", p.adjust.method = "holm", 
                purify = T)
fit11$DIFitems

# comparing significance
round(fit9$pval, 3)
round(fit10$adj.pval, 3)
round(fit11$adj.pval, 3)

#-----------------------------------------------------------------------------
# TROUBLE SHOOTING
#-----------------------------------------------------------------------------

# issues with convergence
# sampled data
set.seed(42)
sam <- sample(1:1000, 420)
# using re-calculation of starting values
fit12a <- difNLR(DataDIF[sam, ], groupDIF[sam], focal.name = 1, model = "4PL",
                 type = "all")
# turn off option of re-calculating starting values
fit12b <- difNLR(DataDIF[sam, ], groupDIF[sam], focal.name = 1, model = "4PL",
                 type = "all", initboot = FALSE)

# with maximum likelihood estimation
fit13 <- difNLR(DataDIF[sam, ], groupDIF[sam], focal.name = 1, model = "4PL",
                type = "all", method = "likelihood")

# issues with item purification
fit14 <- difNLR(DataDIF[, 1:12], groupDIF, focal.name = 1, model = "4PL",
                type = "all", purify = TRUE)
fit14$difPur

#-----------------------------------------------------------------------------
#                             REAL DATA EXAMPLE
#-----------------------------------------------------------------------------

# loading LearningToLearn data
data("LearningToLearn", package = "ShinyItemAnalysis")

# dichotomous items for 6th grade
LtL6_gr6 <- LearningToLearn[, c("track", paste0("Item6", LETTERS[1:8], "_6"))]
head(LtL6_gr6)

# standardized total score achieved in 6th grade
zscore6 <- scale(LearningToLearn$score_6)

#--------------------------
# DIF in 6th grade - binary data
#--------------------------

# fit 3PL model using stand. total score from 6th grade
fitex1 <- difNLR(Data = LtL6_gr6, group = "track", focal.name = "AS", model = "3PL",
                 match = zscore6)
fitex1$DIFitems

# testing difference only in guessing
fitex2 <- difNLR(Data = LtL6_gr6[, c(1, 9)], group = "track", focal.name = "AS", model = "3PL",
                 type = "c", match = zscore6)
fitex2$DIFitems

# plot of characteristic curves of DDF items
plot(fitex2, item = fitex2$DIFitems)

#--------------------------
# DIF in 9th grade - binary data
#--------------------------

# dichotomous items for 9th grade
LtL6_gr9 <- LearningToLearn[, c("track", paste0("Item6", LETTERS[1:8], "_9"))]
head(LtL6_gr9)

# fit 3PL model using stand. total score from 6th grade
fitex3 <- difNLR(Data = LtL6_gr9, group = "track", focal.name = "AS", model = "3PL",
                 match = zscore6)
fitex3$DIFitems

# predicted values for item 1 and students with low, average, and high
# standardized total score in BS and AS track
predict(fitex3, match = c(-1, 0, 1), group = rep(0, 3), item = 1) # BS track
predict(fitex3, match = c(-1, 0, 1), group = rep(1, 3), item = 1) # AS track

#--------------------------
# DDF in changes - nominal data
#--------------------------

# nominal data for changes between 6th ang 9th grade
LtL6_change <- LearningToLearn[, c("track", paste0("Item6", LETTERS[1:8], "_changes"))]
summary(LtL6_change[, 1:4])

# fit nominal model, key is vector of pattern "11"
fitex4 <- ddfMLR(Data = LtL6_change, group = "track", focal.name = "AS", 
                 key = rep("11", 8), match = zscore6)
fitex4$DDFitems

# plot of characteristic curves of DDF items
plot(fitex4, item = fitex4$DDFitems)

#--------------------------
# DIF in changes - ordinal data
#--------------------------

# ordinal data for change between 6th and 9th grade
LtL6_change_ord <- data.frame(
  track = LtL6_change$track,
  sapply(LtL6_change[, -1], 
         function(x) as.factor(ifelse(x == "10", 0, ifelse(x == "01", 2, 1))))
)
summary(LtL6_change_ord[, 1:4])

# fit adjacent category logit model
fitex5 <- difORD(Data = LtL6_change_ord, group = "track", focal.name = "AS",
                 model = "adjacent", match = zscore6)
fitex5$DIFitems

# plot of characteristic curves of DIF items
plot(fitex5, item = fitex5$DIFitems)
