library(tidycode)
library(dplyr)

starwars %>%
  select(height, mass)

quote(starwars %>%
        select(height, mass)) %>%
  is.call()

`%>%`(starwars, select(height, mass))

## install.packages("matahari")

library(matahari)
dance_start()
1 + 2
"here is some text"
sum(1:10)
dance_stop()
dance_tbl()

dance_tbl()[["expr"]]

dance_start(value = TRUE)
1 + 2
"here is some text"
sum(1:10)
dance_stop()
tbl <- dance_tbl()


library(dplyr)
library(purrr)

t_numeric <- tbl %>%
  mutate(
    numeric_output = map_lgl(value, is.numeric)
  ) %>%
  filter(numeric_output)

t_numeric


code_file <- system.file("test", "sample_code.R", package = "matahari")
dance_recital(code_file)


code_string <- '
4 + 4
"wow!"
mean(1:10)
stop("Error!")
warning("Warning!")
message("Hello?")
cat("Welcome!")
'
dance_recital(code_string)

## install.packages("tidycode")
library(tidycode)
tidycode_example("example_analysis.R")

tidycode_example()

df <- read_rfiles(tidycode_example(c("example_analysis.R", "example_plot.R")))
df

u <- unnest_calls(df, expr)
u

df %>%
  unnest_calls(expr)


df %>%
  unnest_calls(expr) %>%
  select(func, args)


u %>%
  inner_join(get_classifications()) %>%
  select(func, classification, lexicon, score)

u %>%
  inner_join(get_classifications("crowdsource")) %>%
  select(func, classification, score)


u %>%
  inner_join(get_classifications("crowdsource", include_duplicates = FALSE)) %>%
  select(func, classification)

u %>%
  inner_join(get_classifications("crowdsource", include_duplicates = FALSE)) %>%
  anti_join(get_stopfuncs()) %>%
  select(func, classification)

library(tidyverse)
library(tidycode)
## load the dataset, called df
load("data/df_phackathon.Rda")

tbl <- df %>%
  unnest_calls(expr)

classification_tbl <- tbl %>%
  anti_join(get_stopfuncs()) %>%
  inner_join(get_classifications("crowdsource", include_duplicates = FALSE))

classification_tbl %>%
  group_by(id, classification) %>%
  summarise(n = n()) %>%
  mutate(pct = n / sum(n)) %>%
  group_by(classification) %>%
  summarise(`Average percent` = mean(pct) * 100) %>%
  arrange(-`Average percent`)

func_counts <- classification_tbl %>%
  count(func, classification, sort = TRUE) %>%
  ungroup()

func_counts


func_counts %>%
  filter(classification %in% c("data cleaning", "exploratory", "modeling", "visualization")) %>%
  group_by(classification) %>%
  top_n(5) %>%
  ungroup() %>%
  mutate(func = reorder(func, n)) %>%
  ggplot(aes(func, n, fill = classification)) +
  theme_bw() +
  geom_col(show.legend = FALSE) +
  facet_wrap(~classification, scales = "free_y") +
  scale_x_discrete(element_blank()) +
  scale_y_continuous("Number of function calls in each classification") +
  coord_flip()

library(wordcloud)

classification_tbl %>%
  count(func, classification) %>%
  with(
    wordcloud(func, n,
              colors = brewer.pal(9, "Set1")[factor(.$classification)],
              random.order = FALSE,
              ordered.colors = TRUE
    )
  )

classification_tbl %>%
  group_by(id, analysis_job, classification) %>%
  summarise(n = n()) %>%
  mutate(pct = n / sum(n)) %>%
  group_by(analysis_job, classification) %>%
  summarise(n = n()) %>%
  mutate(avg_pct = n / sum(n)) %>%
  ggplot(aes(x = analysis_job, y = avg_pct, fill = classification)) + 
  geom_bar(stat = "identity") + 
  scale_y_continuous("Average percent", labels = scales::percent) + 
  scale_x_discrete("Participant conducts analyses as part of their job")

library(tidyverse)
library(gh)
library(tidycode)

# dplyr_code <- gh("/repos/tidyverse/dplyr/contents/R") %>%
#   purrr::map("download_url") %>%
#   read_rfiles()
# 
# datatable_code <- gh("/repos/Rdatatable/data.table/contents/R") %>%
#   purrr::map("download_url") %>%
#   read_rfiles()

load("data/dplyr_code.Rda")
load("data/datatable_code.Rda")

pkg_data <- bind_rows(
  list(
    dplyr = dplyr_code,
    datatable = datatable_code
  ),
  .id = "pkg"
) %>%
  filter(
    !map_lgl(expr, is.null),
    !map_lgl(expr, is.character)
  )


func_counts <- pkg_data %>%
  unnest_calls(expr) %>%
  count(pkg, func, sort = TRUE)

func_counts

top_funcs <- func_counts %>%
  group_by(pkg) %>%
  top_n(10) %>%
  ungroup() %>%
  arrange(pkg, n) %>%
  mutate(i = row_number())

ggplot(top_funcs, aes(i, n, fill = pkg)) +
  theme_bw() +
  geom_col(show.legend = FALSE) +
  facet_wrap(~pkg, scales = "free") +
  scale_x_continuous(
    element_blank(),
    breaks = top_funcs$i,
    labels = top_funcs$func,
    expand = c(0, 0)
  ) +
  coord_flip()

