# Copyright 2019 Maciej Bartoszuk and Marek Gagolewski
# Licenced under the GNU GPL 3.0 or later
# Installation:
# install.packages("SimilaR")
# install.packages("magrittr") # needed for an illustrative example
# install.packages(c("DescTools", "nortest")) # needed for a case study
# Load the library:
library(SimilaR)

# 2. Program Dependence Graph

show_ast <- function(x) {
  as.list_deep <- function(x) # convert to a plain list (recursively)
  { if (is.call(x)) lapply(as.list(x), as.list_deep) else x }
  x <- substitute(x)   # expression that generated the argument
  str(as.list_deep(x)) # pretty-print
}

show_ast(d <- sum((x-y)*(x-y)))

show_ast(for(i in 1:5) {
  print("i = ", i)
  if (i %% 2 == 0) print(":)") else print(":(")
})


# 3. Comparing Program Dependence Graphs

clamp1 <- function(x)
{
  if (max(x)-min(x) < 1e-5)
    NULL
  else
    (x - min(x))/(max(x)-min(x))
}

standardize <- function(x)
{
  if (sd(x) < 1e-5)
    NULL
  else
    (x - mean(x))/sd(x)
}

SimilaR_fromTwoFunctions(clamp1, standardize) # 0.79, as in the article
SimilaR_fromTwoFunctions(clamp1, standardize, aggregation = "both") # 0.76 and 0.83, as in the article

# 4. Illustrative Examples

# again the same:
SimilaR_fromTwoFunctions(clamp1, standardize) # 0.79, as in the article
SimilaR_fromTwoFunctions(clamp1, standardize, aggregation = "both") # 0.76 and 0.83, as in the article


library("magrittr")

clamp2 <- function(y)
{
  longName <- y                   # variable duplication
  longName2 <- min
  z <- { sum(longName**2) }       # dead code
  min_y <- longName %>% longName2 # forward-pipe
  max_y <- y %>% max
  max_y_min_y <- max_y-min_y # memoization
  if(!(max_y_min_y >= 1e-5)) # canonicalization of the if statement
  {
    return(NULL)
  }
  ((y - min_y)/max_y_min_y) # tail call to return removed
}

# testing both functions compute the same:
x <- {set.seed(123); rnorm(5)} # example data
stopifnot(clamp1(x) == clamp2(x))

SimilaR_fromTwoFunctions(clamp1, clamp2, aggregation="both") # 1 and 1, as in the article

# Loops:

clamp1_vectorized1 <- function(x) {
  x %>% lapply(function(y) {
    if (max(y)-min(y) < 1e-5) {
      {{{{NULL}}}}
    } else {
      {{{{(y - min(y))/(max(y)-min(y))}}}}
    }
  })
}

clamp1_vectorized2 <- function(x) {
  n <- length(x)
  res <- vector("list", n) # NULLs
  for (i in 1:n) { # assumed n>0
    m <- min(x[[i]])
    mm <- max(x[[i]])-m
    if (mm >= 1e-5)
      res[[i]] <- (x[[i]] - m)/mm
  }
  return(res)
}

SimilaR_fromTwoFunctions(clamp1_vectorized1, clamp1_vectorized2,
                         aggregation="both") # 0.78 and 0.92, as in the article

# 5. A Case Study

library("SimilaR")
dir_output <- tempfile("dir")
dir.create(dir_output)

for (pkg in c("DescTools", "nortest"))
{
  library(pkg, character.only=TRUE)  # attach the package
  env <- as.environment(paste0("package:", pkg)) # package's environment
  fun_names <- ls(envir=env)  # list of all exported objects
  file_text <- character(0) # the to-be code-base (1 function == 1 string)
  for (fun in fun_names)
  {
    f <- get(fun, env)   # get function object
    if (!is.function(f)) next
    
    f_char <-  paste0(deparse(f), collapse="\n") # extract source code
    file_text[length(file_text)+1] <- sprintf("`%s`<-%s", fun, f_char)
  }
  file_name <- file.path(dir_output, paste0(pkg, ".R"))
  writeLines(file_text, file_name) # write source file
  cat(sprintf("%s: %d functions processed.\n", pkg, length(file_text)))
}

time0 <- Sys.time()
results <- SimilaR_fromDirectory(dir_output,
                                 fileTypes="file", aggregation="both")
print(Sys.time()-time0)

head(results, 10)

