# Shuowen Chen
# 11/6/2019
# The code script reproduces the examples in submission "Sorted Effects in R: A Vignette"

install.packages("SortedEffects")
library("SortedEffects")

# We use the package "xtable" to tabulate the results.
install.packages("xtable")
library("xtable")

# Application 1
data("mortgage")

# Reproduce figure 1
fm <- deny ~ black + p_irat + hse_inc + ccred + mcred + pubrec +
  ltv_med + ltv_high + denpmi + selfemp + single + hischl
test <- spe(fm = fm, data = mortgage, var = "black", method = "logit",
            us = c(2:98)/100, b = 500, bc = TRUE)
plot(x = test, ylim = c(0, 0.25), ylab = "Change in Probability",
     main = "APE and SPE of Being Black on the Prob of Mortgage Denial",
     sub = "Logit Model")
# Table 1
summary(test, result = "average")
# Table 2
summary(test)
# Table 3
t <- c("deny", "p_irat", "black", "hse_inc", "ccred", "mcred", "pubrec",
       "denpmi", "selfemp", "single", "hischl", "ltv_med", "ltv_high")
CA <- ca(fm = fm, data = mortgage, var = "black", method = "logit",
         cl = "both", t = t, b = 500, bc = TRUE)
summary(CA)
# Table 4
CAdiff <- ca(fm = fm, data = mortgage, var = "black", t = t,
             method = "logit", cl = "diff", b = 500, bc = TRUE)
summary(CAdiff)
# Figure 2
t2 <- c("p_irat", "hse_inc")
CAdist <- ca(fm = fm, data = mortgage, var = "black", method = "logit",
             t = t2, b = 500, interest = "dist")
plot(CAdist, var = "p_irat", ylab = "Prob",
     xlab = "Monthly Debt-to-Income Ratio", sub = "logit model")
plot(CAdist, var = "hse_inc", ylab = "Prob",
     xlab = "Monthly Housing Expenses-to-Income Ratio",
     sub = "logit model")
# Table 5
set_b <- subpop(fm, data = mortgage, method = "logit", var = "black",
                u = 0.1, alpha = 0.1, b = 500)
groups <- summary(set_b, vars = c("p_irat", "hse_inc"))
most_affected <- groups$most_affected
xtable(most_affected)
# Table 6
sum_stats_most <- groups$stats_most
xtable(sum_stats_most)
# Figure 3
plot(set_b, varx = mortgage$p_irat, vary = mortgage$hse_inc,
     xlim = c(0, 1.5), ylim = c(0, 1.5), xlab = "Debt/Income",
     ylab = "Housing expenses/Income", overlap = TRUE)

# Application 2
data(wage2015)
wage2015$male <- 1 - wage2015$female
fmla1 <- lnw ~ male*(ms + (exp1 + exp2 + exp3 + exp4)*educ + occ +
                       ind + region)
# figure 4
gap <- spe(fm = fmla1, data = wage2015, samp_weight = wage2015$weight,
           var = "male", subgroup = wage2015[,"female"] == 1,
           boot_type = "weighted", us = c(2:98)/100, b = 500, bc = FALSE)

plot(x = gap, main = "APE and SPE of Gender Wage Gap for Women",
     sub = "OLS Model", xlab = "Percentile Index",
     ylab = "Gender Wage Gap", ylim = c(-0.1, 0.45))
# figure 5
fem_mar <- wage2015[, "female"] == 1 & wage2015[, "ms"] == "married"
fem_nev <- wage2015[, "female"] == 1 & wage2015[, "ms"] == "nevermarried"

gap_mar <- spe(fm = fmla1, data = wage2015, samp_weight = wage2015$weight,
               var = "male", subgroup = fem_mar, us = c(2:98)/100, b = 500,
               bc = FALSE, boot_type = "weighted")

gap_nev <- spe(fm = fmla1, data = wage2015, samp_weight = wage2015$weight,
               var = "male", subgroup = fem_nev, us = c(2:98)/100, b = 500,
               bc = FALSE, boot_type = "weighted")

plot(x = gap_mar, main = "Married Women", sub = "OLS Model",
     xlab = "Percentile Index", ylab = "Gender Wage Gap",
     ylim = c(-0.2, 0.45))

plot(x = gap_nev, main = "Never Married Women", sub = "OLS Model",
     xlab = "Percentile Index", ylab = "Gender Wage Gap",
     ylim = c(-0.2, 0.45))

# Table 7
tw <- c("lnw", "female", "ms", "educ", "region", "exp1", "occ", "ind")
cat <- c("ms", "educ", "region", "occ", "ind")
Char <- ca(fm = fmla1, data = wage2015, samp_weight = wage2015$weight,
           var = "male", t = tw, cl = "both", b = 500,
           subgroup = wage2015[,"female"] == 1, boot_type = "weighted",
           bc = FALSE, u = 0.05)
summary(Char)
# Table 8
Chardiff <- ca(fm = fmla1, data = wage2015, samp_weight = wage2015$weight,
               var = "male", t = tw, cl = "diff", b = 500, cat = cat,
               subgroup = wage2015[,"female"] == 1, boot_type = "weighted",
               bc = FALSE, u = 0.05)
summary(Chardiff)
# Figure 6
set <- subpop(fm = fmla1, data = wage2015, var = "male",
              samp_weight = wage2015$weight, boot_type = "weighted",
              b = 500, subgroup = wage2015[, "female"] == 1, u = 0.05)

plot(set, varx = wage2015$exp1, vary = wage2015$lnw,
     main = "Projections of Exp-lnw", sub = "OLS", xlab = "Exp",
     ylab = "Log Wages")

plot(set, varx = wage2015$exp1, vary =wage2015$ms,
     main = "Projections of Exp-MS", sub = "OLS", xlab = "Exp",
     ylab = "Marital Status")
