
## == This code reproduces the entire example in the article. The ArCo source ==##
## == code was submitted with the paper in a tar.gz file. == ##

## == Note that the package can be installed directly from CRAN with install.packages("ArCo") = ##

## == The package imports the packages Matrix, glmnet and boot. These packages are also on CRAN.== ##
## == The boot and the Matrix packages are already installed on most R distributions == ##
## == The glmnet package must be instaled to run this code == ##


## =================================================================================== ##
## ============================Example for data.q1==================================== ##
## =================================================================================== ##

library(ArCo)
data(data.q1)
?data.q1

plot(data.q1[,1],type="l")
abline(v=51,col=2,lty=2)

fn=function(X,y){
  return(lm(y~X))
}
p.fn=function(model,newdata){
  b=coef(model)
  return(cbind(1,newdata) %*% b)
}

ArCo=fitArCo(data = list(data.q1), fn = fn, p.fn = p.fn, treated.unity = 1, t0 = 51, VCOV.type = "nw", VCOV.lag = 3, prewhitening.kernel = TRUE)
plot(ArCo,display.fitted=TRUE)
ArCo$delta


## =================================================================================== ##
## ============================Example for data.q2==================================== ##
## =================================================================================== ##


library(glmnet)
set.seed(123)
data("data.q2")
ArCo2=fitArCo(data = data.q2, fn = cv.glmnet, p.fn = predict, treated.unity = 1, t0 = 51, VCOV.type = "nw", VCOV.lag = 3, prewhitening.kernel = TRUE,boot.cf = TRUE,R=100, l=3)
plot(ArCo2,display.fitted=TRUE,confidence.bands = TRUE, alpha=0.05)
ArCo2$delta


## =================================================================================== ##
## ============================Example for data.q2 and estimate_t0==================== ##
## =================================================================================== ##

library(glmnet)
set.seed(123)
data("data.q2")
t0=estimate_t0(data = data.q2, fn = cv.glmnet, p.fn = predict, treated.unity = 1, start = 0.4, end = 0.9)
t0$t0


## =================================================================================== ##
## ============================BIC fn and p.fn======================================== ##
## =================================================================================== ##

fn=function (x, y){
  n=length(y)
  model = glmnet(x = x, y = y)
  coef = coef(model)
  df = model$df
  yhat=cbind(1,x)%*%coef
  residuals = (y- yhat)
  mse = colMeans(residuals^2)
  nvar = df + 1
  bic = n*log(mse)+nvar*log(n)
  selected= which(bic == min(bic))
  return(coef[,selected])
}

p.fn=function(model,newdata){
  return(cbind(1,newdata) %*% model)
}

## =================================================================================== ##
## ============================Example Inflation and tax evasion====================== ##
## =================================================================================== ##

data("inflationNFP")
t0=34
set.seed(123)
ArCoNFP=fitArCo(inflationNFP,fn,p.fn,1,t0,VCOV.type = "nw",VCOV.lag = 2,prewhitening.kernel = TRUE)
plot(ArCoNFP,plot=1,display.fitted = TRUE)
ArCoNFP$delta
ArCoNFP$p.value

# == effects on the CPI == #
FAHsp=inflationNFP$inflationFAH[,1]
real=cumprod(1+FAHsp/100)
cf=cumprod(1+c(FAHsp[1:(t0-1)],ArCoNFP$cf[,1])/100)
fitted=cumprod(1+fitted(ArCoNFP)[,1]/100)

plot(real,type="l",ylab="Y1",xlab="Time")
lines(c(rep(NA,t0-1),tail(cf,length(real)-t0+1)),col=4)
lines(fitted,col=2)
abline(v=t0,col=4,lty=2)
legend("topleft",legend=c("Observed","Fitted","Counterfactual"),col=c(1,2,4),lty=1,lwd=1,cex=1,seg.len = 1,bty="n")


## =================================================================================== ##
## =====================Example comparing with synthetic control====================== ##
## =================================================================================== ##

## =================================================================================== ##
## =====================Artificial data from the Synth package ======================= ##
## =================================================================================== ##


# = Exact code reproduced from the first exaple in the ?synth documentation = #
library(Synth)
data("synth.data")
t0=Sys.time()
dataprep.out<-
  dataprep(
    foo = synth.data,
    predictors = c("X1", "X2", "X3"),
    predictors.op = "mean",
    dependent = "Y",
    unit.variable = "unit.num",
    time.variable = "year",
    special.predictors = list(
      list("Y", 1991, "mean"),
      list("Y", 1985, "mean"),
      list("Y", 1980, "mean")
    ),
    treatment.identifier = 7,
    controls.identifier = c(29, 2, 13, 17, 32, 38),
    time.predictors.prior = c(1984:1989),
    time.optimize.ssr = c(1984:1990),
    unit.names.variable = "name",
    time.plot = 1984:1996
  )

synth.out <- synth(dataprep.out)
Sys.time()-t0

head(synth.data)
# = Adjust the synth.data panel to be compatible with fitArCo = #
ArCodata=panel_to_ArCo_list(synth.data,"year","unit.num",c("Y","X2"))
ArCodata=lapply(ArCodata,function(x)x[as.character(1984:1996),])

# = Run the ArCo = #
t0=Sys.time()
ArCo3=fitArCo(ArCodata,fn,p.fn,treated.unity = 2,t0 = 9)
Sys.time()-t0
ArCo3$delta[1,]
ArCo3$p.value

# = Plot the ArCo and the SC = #
synthcf=dataprep.out$Y0plot%*%synth.out$solution.w
plot(dataprep.out$Y1plot,type="l",ylim=c(80,180),ylab="Y",xlab="Time")
lines(synthcf,col=2)
abline(v=8,col=4,lty=2)
lines(c(fitted(ArCo3)[,1], ArCo3$cf[,1]),col=4)
legend("topleft",legend=c("Y","Synth","ArCo"),col=c(1,2,4),cex = 1,seg.len = 1,bty = "n",lty=1)




## =================================================================================== ##
## =======================  Basque data from the Synth package ======================= ##
## =================================================================================== ##
# = Exact code reproduced from the second exaple in the ?synth documentation = #
# = Comments and printed outputs were omitted = #
data("basque")
t0=Sys.time()
dataprep.out <-
  dataprep(
    foo = basque
    ,predictors= c("school.illit",
                   "school.prim",
                   "school.med",
                   "school.high",
                   "school.post.high"
                   ,"invest"
    )
    ,predictors.op = c("mean")
    ,dependent     = c("gdpcap")
    ,unit.variable = c("regionno")
    ,time.variable = c("year")
    ,special.predictors = list(
      list("gdpcap",1960:1969,c("mean")),                            
      list("sec.agriculture",seq(1961,1969,2),c("mean")),
      list("sec.energy",seq(1961,1969,2),c("mean")),
      list("sec.industry",seq(1961,1969,2),c("mean")),
      list("sec.construction",seq(1961,1969,2),c("mean")),
      list("sec.services.venta",seq(1961,1969,2),c("mean")),
      list("sec.services.nonventa",seq(1961,1969,2),c("mean")),
      list("popdens",1969,c("mean")))
    ,treatment.identifier  = 17
    ,controls.identifier   = c(2:16,18)
    ,time.predictors.prior = c(1964:1969)
    ,time.optimize.ssr     = c(1960:1969)
    ,unit.names.variable   = c("regionname")
    ,time.plot            = c(1955:1997) 
  )

dataprep.out$X1["school.high",] <- 
  dataprep.out$X1["school.high",] + 
  dataprep.out$X1["school.post.high",]
dataprep.out$X1                 <- 
  as.matrix(dataprep.out$X1[
    -which(rownames(dataprep.out$X1)=="school.post.high"),])
dataprep.out$X0["school.high",] <- 
  dataprep.out$X0["school.high",] + 
  dataprep.out$X0["school.post.high",]
dataprep.out$X0                 <- 
  dataprep.out$X0[
    -which(rownames(dataprep.out$X0)=="school.post.high"),]

lowest  <- which(rownames(dataprep.out$X0)=="school.illit")
highest <- which(rownames(dataprep.out$X0)=="school.high")

dataprep.out$X1[lowest:highest,] <- 
  (100 * dataprep.out$X1[lowest:highest,]) /
  sum(dataprep.out$X1[lowest:highest,])
dataprep.out$X0[lowest:highest,] <-  
  100 * scale(dataprep.out$X0[lowest:highest,],
              center=FALSE,
              scale=colSums(dataprep.out$X0[lowest:highest,])
  )

t0=Sys.time()
synth.out <- synth(data.prep.obj = dataprep.out)
Sys.time()-t0
# = Adjust the panel to fitArCo = #
leveldata=panel_to_ArCo_list(basque,"year","regionno",c("gdpcap","invest"))
# = GDP should be differentiated (delta(log)) = #
diffdata=leveldata
diffdata$gdpcap=diff(log(diffdata$gdpcap),1)
diffdata$gdpcap=rbind(NA,diffdata$gdpcap)
# = adjust sample size = #
diffdata=lapply(diffdata,function(x)x[-c(1:10,42,43),])

# = Estimate intervention period = #
t0.4=estimate_t0(diffdata,fn,p.fn,treated.unity =  17)
# = Estimate ArCo = #
ArCo4=fitArCo(diffdata,fn,p.fn,17,t0.4$t0, boot.cf = TRUE, R=500)
plot(ArCo4,plot=1)
ArCo4$delta
ArCo4$p.value

# = Rebuild lvl counterfactual GDP = #
y0=leveldata$gdpcap[10,17] # = First observation
ArCocf=y0*exp(cumsum(c(fitted(ArCo4)[,1],ArCo4$cf[,1])))
y.lvl=leveldata$gdpcap[as.character(1965:1995),17]

# = Extract synth counterfactual and plot = #
synthcf=(dataprep.out$Y0plot%*%synth.out$solution.w)[as.character(1965:1995),]
xax=rownames(diffdata$gdpcap)
plot(xax, y.lvl,type="l",ylab="Y",xlab="Time",ylim=c(0,12),lwd=3)
lines(xax,ArCocf,col=4)
lines(xax,synthcf,col=2)
abline(v=xax[t0.4$t0],col=4,lty=2)
legend("topleft",legend=c("Y","Synth","ArCo"),col=c(1,2,4),lty=1,lwd=c(3,1,1),cex=1,seg.len = 1,bty="n")





