############################################################
#    Code in section :
#     Demonstration of the CMatching package on NELS-88 data" (page 5)
############################################################

options(digits=3)

# install packages
install.packages(c("Matching", "CMatching"))
library(Matching)
library(CMatching)

# load and attach data
data(schools)
attach(schools)
#str(schools)

# define tretment, outcome and group variable
 T <- ifelse(homework>1,1,0)
 Y <- math
 
# group
Group <- schid

# describe treated by groups
round(prop.table((table(Group, T)),1), digits=2)
addmargins(table(Group, T))

# #From the table above we can notice that the total school sample size is 
# fairly homogeneous with the exception of one school (code = 62821  ) where
# the number of treated students (52) is considerably higher than the number 
# of control students (15). Within-cluster matching can be difficult in groups where
# the proportion of treated units is high because there are relatively few controls
# that can potentially serve as a match. Preferential-within cluster matching would
# be less restrictive.


# estimate propensity score model
pmodel <- glm(T~ses+as.factor(sex)+white+public, family=binomial(link="logit"))

# predicted probabilities of treatment for each student:
eps <- fitted(pmodel)

# Within-cluster propensity score matching
psm_w <- CMatch(type="within", Y=Y, Tr=T, X=eps, Group=Group)

# summarize output
summary(psm_w)

# % of drops
psm_w$ndrops/psm_w$orig.treated.nobs

# % of drops by group
psm_w$orig.dropped.nobs.by.group/table(Group)
#0*table(Group)+psm_w$orig.ndrops.by.group[names(table(Group))]/table(Group)

# balance analysis
b_psm_w <- CMatchBalance(T~ses+as.factor(sex)+white+public,data=schools,match.out=psm_w)

# balance improved for the ses, sex and public variables. In fact, for the latter it was possible to attain exact matching. This is guaranteed by within-cluster matching because it forces treated and control students to belong to the same school. Importantly, it is true also for all other school-level variables (even unobservable).

# However, white balance  did not improve and it was not possible to match 9 observations
# drops can be partly due to forcing the matching to happen only within clusters.

# Preferential within-cluster propensity score matching
 psm_pw <- CMatch(type="pwithin", Y=Y, Tr=T, X=eps, Group=Group)

 summary(psm_pw)
 
# balance analysis after pref within matchng
> b_psm_pw <- CMatchBalance(T~ses+as.factor(sex)+white+public,data=schools,match.out=psm_pw)

# As expected preferential within-cluster matching improves in terms of reduced unmatched units. As we already noticed
# On the other hand, within-cluster matching guarantees a perfect balance of school-level variable (both observed and unobserved). 

# Subsection: Alternative implementations of propensity score matching
# a) Multivariate covariate matching
# The sintax is very similar to propensity score matching.

mal_w <- CMatch(type="within", Y=Y, Tr=T, X=cbind(ses, sex, white, public), Group=Group)

summary(mal_w)

b_mal_w <- CMatchBalance(T~ses+as.factor(sex)+white+public,data=schools,match.out=mal_w)

# b) Subsection: Estimating the propensity score with hierarchical models
# 
mod <- glm(T ~ ses + parented + public + sex + race + urban
    +schid - 1,family=binomial(link="logit"),data=schools)
    eps <- fitted(mod)
 # match within on eps   
    dpsm <- CMatch(type="within",Y=math, Tr=T, X=eps, Group=NULL, caliper=0.1)

# code in section: 
#  A simple usage example (page 4)
# toy data set

id <-c(1,2,3,4,5, 6,7,8,9,10)
 g<-c( 1,1,1,1,1, 2,2,2,2, 2 )
 t<- c( 1,1,0,0,0, 1,1,0,0, 0 )
 x <- c(1,2,1,5,2.5,3,4,2,4, 5 )
toy<-t(data.frame(id,c, t,x))

#pooled matching
  pm <- Match(Y=NULL, Tr=t, X=x, caliper=0.4)
# same output as before (with a warning about the absence of groups)
 pm <- CMatch(type="within",Y=NULL, Tr=t, X=x, Group=NULL, caliper=0.4)
 
 #within matching 
  wm <- CMatch(type="within",Y=NULL, Tr=t, X=x, Group=g, caliper=0.4)
#preferential-within matching
  pwm <- CMatch(type="pwithin",Y=NULL, Tr=t, X=x, Group=g, caliper=0.4)

# summarize results  
 summary(mw)
 
 # check balance of x after within matching
CMatchBalance( t~x , match.out=wm)

