# Purpose: Script for manuscript code for journal reviewer.
#
# Notes:
#   1) This code changes the locale setting.
#   2) Package 'LBE' is needed for 'PhViD':
#          > source("https://bioconductor.org/biocLite.R")
#          > biocLite("LBE")
#   3) The data set 'dat_tidy' created in "CAERS data example" and used in
#        "Appendix A" might change slightly over time. We felt it was useful
#        to show the user how to obtain and munge the current data. To minimize
#        changes, we only keep reports before the year 2017.
#   4) The static data set 'caers' (included in openEBGM) is used for plotting.
#
# Caution: This script will take over an hour to run (the Appendices are
#            especially time-consuming). Also, some of the values in Appendix B
#            will require nearly 48GB of free RAM to calculate, so please
#            make sure you have enough RAM before running that code.

library(openEBGM)  #v0.2.0
library(ggplot2)
library(tidyr)
library(PhViD)  #v1.0.8

sessionInfo()

## Introduction ----------------------------------------------------------------
# Model-based scores

#Figure 1
num_pts <- 500
N <- 1:num_pts
E <- N * 0.1  #to make RR = 10
RR <- N / E
dat <- data.frame(N = N, E = E, RR = RR)

theta1 <- c(alpha1 = .01, beta1 = .1, alpha2 = 5, beta2 = 20, P = .5)
dat$qn1 <- Qn(theta1, N = dat$N, E = dat$E)
dat$EBGM1 <- ebgm(theta1, N = dat$N, E = dat$E, qn = dat$qn1, digits = 3)

theta2 <- c(alpha1 = 0.1, beta1 = 1.2, alpha2 = 0.4, beta2 = 8, P = .05)
dat$qn2 <- Qn(theta2, N = dat$N, E = dat$E)
dat$EBGM2 <- ebgm(theta2, N = dat$N, E = dat$E, qn = dat$qn2, digits = 3)

theta3 <- c(alpha1 = 2, beta1 = 2, alpha2 = 5, beta2 = 5, P = .8)
dat$qn3 <- Qn(theta3, N = dat$N, E = dat$E)
dat$EBGM3 <- ebgm(theta3, N = dat$N, E = dat$E, qn = dat$qn3, digits = 3)

theta4 <- c(alpha1 = 5, beta1 = 5, alpha2 = 5, beta2 = 5, P = .5)
dat$qn4 <- Qn(theta4, N = dat$N, E = dat$E)
dat$EBGM4 <- ebgm(theta4, N = dat$N, E = dat$E, qn = dat$qn4, digits = 3)

dat_long <- gather(dat, key = theta, value = EBGM, EBGM1, EBGM2, EBGM3, EBGM4)

breaks <- c(1, seq(50, 500, by = 50))

p1 <- ggplot(dat_long, aes(x = N, y = EBGM, group = theta, color = theta)) +
  geom_line(size = 1.5) +
  geom_hline(yintercept = 10, color = "black", size = 2, linetype = "dashed") +
  annotate("text", x = 100, y = 10.7, label = "RR = 10", size = 6.5) +
  scale_x_discrete(breaks = breaks, labels = breaks, limits = breaks) +
  scale_y_continuous(limits = c(0, 11), breaks = seq(0, 10, by = 2)) +
  scale_color_manual(values = c("green", "blue", "red", "orange"),
                     labels = c(expression(theta[1]), expression(theta[2]),
                                expression(theta[3]), expression(theta[4]))) +
  labs(x = "Actual Count (N)", y = "EBGM", color = "Hyper.") +
  theme_classic() +
  theme(
    plot.margin  = unit(c(0.5, 1, .5, .5), "cm"),
    axis.line    = element_line(size = 1),
    axis.title.x = element_text(size = 15, hjust = 0.5, margin = margin(14, 0, 0, 0)),
    axis.title.y = element_text(size = 15, hjust = 0.5),
    axis.text    = element_text(size = 12),
    legend.background  = element_rect(size = 0.5, linetype = "solid",
                                      color = "black", fill = "white"),
    legend.title       = element_text(face = "italic", size = 16),
    legend.text        = element_text(size = 14, face = "italic", hjust = 2),
    legend.position    = c(0.6, 0.35),
    legend.title.align = 0.5,
    legend.text.align  = -0.25,
    legend.key.size    = unit(0.75, units = "cm"),
    legend.key.height  = unit(0.8, units = "cm"),
    legend.margin      = margin(.4, .4, .4, .4, "cm")
  )
p1
#ggsave("asymptoticBehavior.pdf", p1, width = 8, height = 4)
rm(p1, dat, dat_long, E, N, RR); gc()


## Data preparation ------------------------------------------------------------

# CAERS data example
Sys.setlocale(locale = "C") #locale can affect sorting order, etc.
site <- "https://www.fda.gov/downloads/Food/ComplianceEnforcement/UCM494018.csv"
dat <- read.csv(site, stringsAsFactors = FALSE, strip.white = TRUE)
dat$yr <- dat$RA_CAERS.Created.Date
dat$yr <- substr(dat$yr, start = nchar(dat$yr) - 3, stop = nchar(dat$yr))
dat$yr <- as.integer(dat$yr)
dat <- dat[dat$PRI_FDA.Industry.Code == 54 & dat$yr < 2017, ]

dat$var1 <- dat$PRI_Reported.Brand.Product.Name
dat$var2 <- dat$SYM_One.Row.Coded.Symptoms
dat$id <- dat$RA_Report..
dat$strat_gen <- dat$CI_Gender
vars <- c("id", "var1", "var2", "strat_gen")
dat <- dat[, vars]

dat$strat_gen <- ifelse(dat$strat_gen %in% c("Female", "Male"),
                        dat$strat_gen, "unknown")

dat2 <- dat[!dat$var1 %in% tools::showNonASCII(dat$var1), ]
head(dat2, 3)

dat_tidy <- tidyr::separate_rows(dat2, var2, sep = ", ")
dat_tidy <- dat_tidy[, vars]
head(dat_tidy, 3)


## Counts and simple disproportionality measures -------------------------------

# Data processing example
data("caers") #small subset of publicly available CAERS data
head(caers, 3)

processRaw(caers) %>% head(3)
processRaw(caers, stratify = TRUE) %>% head(3)
processRaw(caers, stratify = TRUE, max_cats = 2) %>% head(3)


## Hyperparameter estimation ---------------------------------------------------

# Hyperparameter estimation example
proc <- processRaw(caers)
    #Figure 2
    set.seed(467543)
    edist <- proc[proc$N < 6, ]

    p2 <- ggplot(data = edist, aes(x = N, y = E), group = N) +
      geom_jitter(position = position_jitter(0.2), alpha = .5) +
      labs(x = "Actual Count (N)", y = "Expected Count (E)") +
      theme_classic() +
      theme(
        axis.line    = element_line(size = 0.5),
        axis.title.x = element_text(size = 18, hjust = 0.5,
                                    margin = margin(14, 0, 0, 0)),
        axis.title.y = element_text(size = 18, hjust = 0.5),
        axis.text    = element_text(size = 12)
      )
    p2
    #ggsave("expectedCounts.png", p2, width = 8, height = 4, dpi = 600)
squashed <- squashData(proc)
squashed <- squashData(squashed, count = 2, bin_size = 10)
head(squashed, 2); tail(squashed, 2)

theta_init1 <- c(alpha1 = 0.2, beta1 = 0.1, alpha2 = 2, beta2 = 4, p = 1/3)
stats::nlminb(start = theta_init1, objective = negLLsquash,
              ni = squashed$N, ei = squashed$E, wi = squashed$weight)$par

theta_init2 <- data.frame(
  alpha1 = c(0.2, 0.1, 0.5),
  beta1  = c(0.1, 0.1, 0.5),
  alpha2 = c(2, 10, 5),
  beta2  = c(4, 10, 5),
  p      = c(1/3, 0.2, 0.5)
)
exploreHypers(squashed, theta_init = theta_init2)
(theta_hat <- autoHyper(squashed, theta_init = theta_init2))


## EB disproportionality scores ------------------------------------------------

# EB scores example
theta_hats <- theta_hat$estimates
qn <- Qn(theta_hats, N = proc$N, E = proc$E)
proc$EBGM <- ebgm(theta_hats, N = proc$N, E = proc$E, qn = qn)
proc$QUANT_05 <- quantBisect(5, theta_hat = theta_hats,
                             N = proc$N, E = proc$E, qn = qn)
proc$QUANT_95 <- quantBisect(95, theta_hat = theta_hats,
                             N = proc$N, E = proc$E, qn = qn)
head(proc, 3)


## Object-oriented features ----------------------------------------------------

# Object creation
proc2 <- processRaw(caers)
ebScores(proc2, hyper_estimate = theta_hat, quantiles = 10)$data %>% head(3)

obj <- ebScores(proc2, hyper_estimate = theta_hat, quantiles = c(10, 90))
head(obj$data, 3)

ebScores(proc2, hyper_estimate = theta_hat, quantiles = NULL)$data %>% head(3)

# Simple descriptive analysis
obj <- ebScores(proc2, hyper_estimate = theta_hat, quantiles = c(10, 90))
obj
summary(obj)

# Bar plots
plot(obj)  #Figure 3
plot(obj, event = "CHOKING")  #Figure 4

# Histograms
plot(obj, plot.type = "histogram")  #Figure 5

# Shrinkage plots
plot(obj, plot.type = "shrinkage")  #Figure 6


## Computational efficiency ----------------------------------------------------

# Efficient processing
proc_zeroes <- processRaw(caers, zeroes = TRUE)
proc_no_zeroes <- processRaw(caers)
squash_zeroes <- squashData(proc_zeroes, count = 0)
squash_no_zeroes <- squashData(proc_no_zeroes)
theta_init <- data.frame(alpha1 = c(0.2, 0.1),
                         beta1 = c(0.1, 0.1),
                         alpha2 = c(2, 10),
                         beta2 = c(4, 10),
                         p = c(1/3, 0.2))
system.time(hyper_zeroes <- autoHyper(squash_zeroes,
                                      theta_init = theta_init, squashed = TRUE,
                                      zeroes = TRUE, N_star = NULL,
                                      max_pts = nrow(squash_zeroes)))
system.time(hyper_no_zeroes <- autoHyper(squash_no_zeroes,
                                         theta_init = theta_init,
                                         squashed = TRUE, zeroes = FALSE,
                                         N_star = 1))
qn_zeroes <- Qn(theta_hat = hyper_zeroes$estimates,
                N = proc_no_zeroes$N, E = proc_no_zeroes$E)
ebgm_zeroes <- ebgm(theta_hat = hyper_zeroes$estimates,
                    N = proc_no_zeroes$N, E = proc_no_zeroes$E, qn = qn_zeroes)
qn_no_zeroes <- Qn(theta_hat = hyper_no_zeroes$estimates,
                   N = proc_no_zeroes$N, E = proc_no_zeroes$E)
ebgm_no_zeroes <- ebgm(theta_hat = hyper_no_zeroes$estimates,
                       N = proc_no_zeroes$N, E = proc_no_zeroes$E, qn = qn_no_zeroes)
hyper_zeroes$estimates
hyper_no_zeroes$estimates

system.time(hyper_squash <- autoHyper(data = squash_no_zeroes, theta_init = theta_init,
                                      squashed = TRUE, zeroes = FALSE, N_star = 1))
system.time(hyper_no_squash <- autoHyper(data = proc_no_zeroes, theta_init = theta_init,
                                         squashed = FALSE, zeroes = FALSE, N_star = 1))
hyper_squash$estimates
hyper_no_squash$estimates

#Figure 7
N <- proc_no_zeroes$N
N_fuzzy <- ifelse(N < 6, N, "6 +")
z_nz_df <- data.frame(ebgm_mean = (ebgm_zeroes + ebgm_no_zeroes) / 2,
                      ebgm_diff = ebgm_zeroes - ebgm_no_zeroes)
p3 <- ggplot(data = z_nz_df, aes(x = ebgm_mean, y = ebgm_diff, col = N_fuzzy))
p3 <- p3 + geom_hline(yintercept = 0, linetype = 2) + geom_point() +
  guides(color = guide_legend("N")) +
  xlab("Average of Estimates") + ylab("Difference of Estimates") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.title.x = element_text(face = "bold"),
        axis.title.y = element_text(face = "bold"),
        panel.border = element_blank(),
        axis.line = element_line(color = "black"))
p3
#ggsave("blandaltman_zeroes.pdf", p3, width = 8, height = 5)

#Figure 8
qn_nz_s <- Qn(theta_hat = hyper_squash$estimates, N = proc_no_zeroes$N, E = proc_no_zeroes$E)
qn_nz_ns <- Qn(theta_hat = hyper_no_squash$estimates, N = proc_no_zeroes$N, E = proc_no_zeroes$E)
ebgm_nz_s <- ebgm(theta_hat = hyper_squash$estimates, N = proc_no_zeroes$N,
                  E = proc_no_zeroes$E, qn = qn_nz_s, digits = 5)
ebgm_nz_ns <- ebgm(theta_hat = hyper_no_squash$estimates, N = proc_no_zeroes$N,
                   E = proc_no_zeroes$E, qn = qn_nz_ns, digits = 5)
sq_nsq_df <- data.frame(ebgm_mean = (ebgm_nz_s + ebgm_nz_ns) / 2,
                        ebgm_diff = ebgm_nz_s - ebgm_nz_ns)
p4 <- ggplot(data = sq_nsq_df, aes(x = ebgm_mean, y = ebgm_diff, col = N_fuzzy))
p4 <- p4 +  geom_hline(yintercept = 0, linetype = 2) + geom_point() +
  guides(color = guide_legend("N")) +
  xlab("Average of Estimates") + ylab("Difference of Estimates") +
  theme_bw() +
  theme(panel.grid = element_blank(),
        axis.title.x = element_text(face = "bold"),
        axis.title.y = element_text(face = "bold"),
        panel.border = element_blank(),
        axis.line = element_line(color = "black"))
p4
#ggsave("bland_altman_squash_nosquash.pdf", p4, width = 8, height = 5)
rm(caers, dat, dat2, edist, proc, proc_no_zeroes, proc_zeroes, proc2, sq_nsq_df)
rm(squash_no_zeroes, squash_zeroes, squashed, z_nz_df, obj, p2, p3, p4); gc()


## Conclusion and discussion ---------------------------------------------------


## Appendix A ------------------------------------------------------------------

dat_tidy$id <- 1:nrow(dat_tidy) #since PhViD cannot count unique reports
counts <- processRaw(dat_tidy)
nrow(counts) #number of points/var1-var2 pairs
theta_init <- c(alpha1 = 0.2, beta1 = 0.06, alpha2 = 1.4, beta2 = 1.8, P = 0.1)

#Start with the PhViD package
system.time({
  dat_phvid <- as.PhViD(counts[, 1:3])
  results_phvid <- GPS(dat_phvid, RANKSTAT = 3, TRONC = TRUE, PRIOR.INIT = theta_init)
})

results_phvid <- results_phvid$ALLSIGNALS[, 1:6]
results_phvid <- results_phvid[order(results_phvid$drug, results_phvid$event), ]
results_phvid$EBGM <- round(2 ^ results_phvid[, 'post E(Lambda)'], 3)
row.names(results_phvid) <- NULL
head(results_phvid, 3)

rm(dat_tidy, results_phvid, dat_phvid, qn, qn_no_zeroes, qn_nz_ns, qn_nz_s)
rm(ebgm_no_zeroes, ebgm_nz_ns, ebgm_nz_s, ebgm_zeroes); gc()

#Now for the openEBGM package
theta_init_df <- t(data.frame(theta_init))
system.time({
  theta1 <- exploreHypers(counts, theta_init = theta_init_df, squashed = FALSE,
                          method = "nlm", max_pts = nrow(counts))
  theta1 <- as.numeric(theta1[1, 2:6])
  results_open1 <- ebScores(counts, list(estimates = theta1), quantiles = NULL)
})

dat_open1 <- results_open1$data
head(dat_open1, 3)

#Now with data squashing
system.time({
  squashed <- squashData(counts, bin_size = 100)
  squashed <- squashData(squashed, count = 2, bin_size = 10)
  theta2 <- exploreHypers(squashed, theta_init = theta_init_df, method = "nlm")
  theta2 <- as.numeric(theta2[1, 2:6])
  results_open2 <- ebScores(counts, list(estimates = theta2), quantiles = NULL)
})

dat_open2 <- results_open2$data
head(dat_open2, 3)

rm(counts)
rm(dat_open1, dat_open2, squashed, N, N_fuzzy, results_open1, results_open2); gc()
#Note: You might need to restart your R session to clear the RAM used by PhVid.


## Appendix B ------------------------------------------------------------------
# Caution: Some values in this table will require nearly 48GB of free RAM to
#            calculate (namely the "with zeroes" data sets with 300k rows in
#            the raw data).

site <- "https://www.fda.gov/downloads/Food/ComplianceEnforcement/UCM494018.csv"
dat  <- read.csv(site, stringsAsFactors = FALSE, strip.white = TRUE)
dat$yr <- dat$RA_CAERS.Created.Date
dat$yr <- substr(dat$yr, start = nchar(dat$yr) - 3, stop = nchar(dat$yr))
dat$yr <- as.integer(dat$yr)
dat <- dat[dat$yr < 2017, ]  #using all industry codes

dat$var1 <- dat$PRI_Reported.Brand.Product.Name
dat$var2 <- dat$SYM_One.Row.Coded.Symptoms
dat$id   <- dat$RA_Report..
dat$strat_gen <- dat$CI_Gender
dat$strat_gen <- ifelse(dat$strat_gen %in% c("Female", "Male"),
                        dat$strat_gen, "unknown")
vars <- c("id", "var1", "var2", "strat_gen")
dat  <- dat[, vars]
dat <- dat[!dat$var1 %in% tools::showNonASCII(dat$var1), ]
dat_tidy <- tidyr::separate_rows(dat, var2, sep = ", ")
dat_tidy <- dat_tidy[dat_tidy$var2 != "", ]
nrow(dat_tidy)

dat_50k  <- dat_tidy[1:50000, ]
dat_100k <- dat_tidy[1:100000, ]
dat_150k <- dat_tidy[1:150000, ]
dat_200k <- dat_tidy[1:200000, ]
dat_250k <- dat_tidy[1:250000, ]
dat_300k <- dat_tidy[1:300000, ]

#without zeroes
#50K pts
system.time({
  dat_50k_unstr_nozeroes <- processRaw(dat_50k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_50k_unstr_nozeroes)
rm(dat_50k_unstr_nozeroes)

system.time({
  dat_50k_str_nozeroes <- processRaw(dat_50k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_50k_str_nozeroes)
rm(dat_50k_str_nozeroes); gc()

#100K
system.time({
  dat_100k_unstr_nozeroes <- processRaw(dat_100k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_100k_unstr_nozeroes)
rm(dat_100k_unstr_nozeroes)

system.time({
  dat_100k_str_nozeroes <- processRaw(dat_100k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_100k_str_nozeroes)
rm(dat_100k_str_nozeroes); gc()

#150K
system.time({
  dat_150k_unstr_nozeroes <- processRaw(dat_150k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_150k_unstr_nozeroes)
rm(dat_150k_unstr_nozeroes)

system.time({
  dat_150k_str_nozeroes <- processRaw(dat_150k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_150k_str_nozeroes)
rm(dat_150k_str_nozeroes); gc()

#200K
system.time({
  dat_200k_unstr_nozeroes <- processRaw(dat_200k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_200k_unstr_nozeroes)
rm(dat_200k_unstr_nozeroes)

system.time({
  dat_200k_str_nozeroes <- processRaw(dat_200k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_200k_str_nozeroes)
rm(dat_200k_str_nozeroes); gc()

#250K
system.time({
  dat_250k_unstr_nozeroes <- processRaw(dat_250k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_250k_unstr_nozeroes)
rm(dat_250k_unstr_nozeroes); gc()

system.time({
  dat_250k_str_nozeroes <- processRaw(dat_250k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_250k_str_nozeroes)
rm(dat_250k_str_nozeroes); gc()

#300K
system.time({
  dat_300k_unstr_nozeroes <- processRaw(dat_300k, stratify = FALSE, zeroes = FALSE)
})
nrow(dat_300k_unstr_nozeroes)
rm(dat_300k_unstr_nozeroes); gc()

system.time({
  dat_300k_str_nozeroes <- processRaw(dat_300k, stratify = TRUE, zeroes = FALSE)
})
nrow(dat_300k_str_nozeroes)
rm(dat_300k_str_nozeroes); gc()

#with zeroes
#50K pts
system.time({
  dat_50k_unstr_zeroes <- processRaw(dat_50k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_50k_unstr_zeroes)
rm(dat_50k_unstr_zeroes); gc()

system.time({
  dat_50k_str_zeroes <- processRaw(dat_50k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_50k_str_zeroes)
rm(dat_50k_str_zeroes); gc()

#100K
system.time({
  dat_100k_unstr_zeroes <- processRaw(dat_100k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_100k_unstr_zeroes)
rm(dat_100k_unstr_zeroes); gc()

system.time({
  dat_100k_str_zeroes <- processRaw(dat_100k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_100k_str_zeroes)
rm(dat_100k_str_zeroes); gc()

#150K
system.time({
  dat_150k_unstr_zeroes <- processRaw(dat_150k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_150k_unstr_zeroes)
rm(dat_150k_unstr_zeroes); gc()

system.time({
  dat_150k_str_zeroes <- processRaw(dat_150k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_150k_str_zeroes)
rm(dat_150k_str_zeroes); gc()

#200K
system.time({
  dat_200k_unstr_zeroes <- processRaw(dat_200k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_200k_unstr_zeroes)
rm(dat_200k_unstr_zeroes); gc()

system.time({
  dat_200k_str_zeroes <- processRaw(dat_200k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_200k_str_zeroes)
rm(dat_200k_str_zeroes); gc()

#250K
system.time({
  dat_250k_unstr_zeroes <- processRaw(dat_250k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_250k_unstr_zeroes)
rm(dat_250k_unstr_zeroes); gc()

system.time({
  dat_250k_str_zeroes <- processRaw(dat_250k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_250k_str_zeroes)
rm(dat_250k_str_zeroes); gc()

#300K
system.time({
  dat_300k_unstr_zeroes <- processRaw(dat_300k, stratify = FALSE, zeroes = TRUE)
})
nrow(dat_300k_unstr_zeroes)
rm(dat_300k_unstr_zeroes); gc()

system.time({
  dat_300k_str_zeroes <- processRaw(dat_300k, stratify = TRUE, zeroes = TRUE)
})
nrow(dat_300k_str_zeroes)
rm(dat_300k_str_zeroes); gc()


## Appendix C ------------------------------------------------------------------

nrow(dat_tidy)  #rows in raw data
system.time({
  counts <- processRaw(dat_tidy, zeroes = TRUE)
})
nrow(counts)  #number of points/var1-var2 pairs in processed data

system.time({
  squashed <- squashData(counts, count = 0, bin_size = 50000, keep_bins = 0)
})
nrow(squashed)

system.time({
  squashed <- squashData(squashed, bin_size = 500, keep_bins = 1)
  squashed <- squashData(squashed, count = 2, bin_size = 100, keep_bins = 1)
  squashed <- squashData(squashed, count = 3, bin_size = 50, keep_bins = 1)
  squashed <- squashData(squashed, count = 4, bin_size = 25, keep_bins = 1)
  squashed <- squashData(squashed, count = 5, bin_size = 10, keep_bins = 1)
  squashed <- squashData(squashed, count = 6, bin_size = 10, keep_bins = 1)
})
nrow(squashed)

theta_init <- c(alpha1 = 0.2, beta1 = 0.06, alpha2 = 1.4, beta2 = 1.8, P = 0.1)
theta_init_df <- t(data.frame(theta_init))

system.time({
  theta_hat <- exploreHypers(squashed, theta_init = theta_init_df,
                             squashed = TRUE, zeroes = TRUE, N_star = NULL,
                             method = "nlminb")
  theta_hat <- as.numeric(theta_hat[1, 2:6])
})

system.time({
  counts_sans0 <- counts[counts$N != 0, ]  #do not need EB scores for zero counts
  results <- ebScores(counts_sans0, list(estimates = theta_hat), quantiles = NULL)
})
nrow(results$data)
rm(counts, counts_sans0, squashed, results); gc()

