dvanic · October 29, 2020 04:45
diff --git a/gistfile1.txt b/gistfile1.txt
 #' ---
 #' title: "Summary tables in R"
 #' subtitle: "The many ways to making sense of your data"
 #' author: "Darya Vanichkina"
 #' date: "`r lubridate::today()`"
 #' output:
 #'   xaringan::moon_reader:
 #'     css: ["default", "assets/sydney-fonts.css", "assets/sydney.css"]
 #'     self_contained: false # if true, fonts will be stored locally
 #'     seal: false # show a title slide with YAML information
 #'     includes:
 #'       in_header: "assets/mathjax-equation-numbers.html"
 #'     nature:
 #'       beforeInit: ["assets/remark-zoom.js", "https://platform.twitter.com/widgets.js"]
 #'       highlightStyle: github
 #'       highlightLines: true
 #'       countIncrementalSlides: false
 #'       ratio: '16:9' # alternatives '16:9' or '4:3' or others e.g. 13:9
 #'       navigation:
 #'         scroll: false # disable slide transitions by scrolling
 #' ---
 #' 

 #' 
 #' class: title-slide
 #' background-image: url("assets/USydLogo-white.svg"), url("assets/title-image1.jpg")
 #' background-position: 10% 90%, 100% 50%
 #' background-size: 160px, 40% 100%
 #' background-color: #0148A4
 #' 
 #' 
 #' # .text-shadow[.white[Summary tables in R]]
 #' <!--## .white[Many ways to make sense of your data]-->
 #' ### .white[Darya Vanichkina]
 #' ### .white[Sydney Informatics Hub]
 #' ### .white[`r lubridate::today()`]
 #' 
 #' 
 #' ---
 #' ## Welcome to short data science training
 #' 
 #' .pull-left[
 #' - Format: 1.5 hours (45min-1hr + 15-30 min Q&A)
 #' - Next course: November 11th: R code profiling & benchmarking 
 #'     - Useful if you've ever written a function 2 ways! Sign up: https://www.eventbrite.com.au/e/how-fast-is-your-r-code-tickets-124965712811
 #' - Link for suggestions for short courses: https://tinyurl.com/sihshort
 #' - Feedback please (at end)
 #' - TRAINING MAILING LIST: https://tinyurl.com/traininglistsih
 #' ]
 #' 
 #' .pull-right[
 #' <iframe src="https://giphy.com/embed/nnFwGHgE4Mk5W" width="480" height="333" frameBorder="0" class="giphy-embed" allowFullScreen></iframe><p><a href="https://giphy.com/gifs/happy-gif-peach-starfish-nnFwGHgE4Mk5W">via GIPHY</a></p>
 #' ]
 #' 
 #' 
 #' 
 #' 
 #' ---
 #' ## Code of Conduct
 #' - Use welcoming and inclusive language
 #' - Be respectful of different viewpoints and experiences
 #' - "Be nice", and support others' learning
 #' - Full details & how to report issues: https://sydney-informatics-hub.github.io/codeofconduct/
 #' 
 #' 
 #' 
 #' 
 #' ---
 #' ## A philosophy of tables
 #' 
 #' 
 #' ### When would you make a table?
 #' 
 #' 1. Beginning/during analysis: quick summary for sanity
 #'     - Often paired with quick data visualisation
 #' 2. For publication: pretty summary table, ideally reproducibly generated based on the actual data
 #'     - Needs to look "pretty"
 #'     - Needs to conform to journal standards
 #' 
 #' 
 #' ---
 #' ## Outline for today `r icon::fa("check-circle", pull = "right")`
 #' 
 #' 1. Creating summary tables in R (for use *within* R)
 #' 2. Creating summary tables in Rmarkdown (for publication/use *outside* of R)
 #' 3. A digression into Rmarkdown table rendering frameworks
 #' 
 #' --
 #' - Note: this will not be an exhaustive overview - we are sure there are libraries we have missed!
 #' 
 #' - Also note: these are the libraries we actually use
 #' 
 #' 
 #' 
 #' 
 #' ---
 #' ## Talk brought to you by R: 1 problem, many solutions 
 #' 
 #' .pull-left[
 ## ----mydata1----------------------------------------------------------------------------------------
 mydata <- data.frame(
  x=c(rep(1, 3), rep(2,3)))
 summary(mydata$x)

 #' 
 #' ]
 #' 
 #' .pull-right[
 ## ---- eval=F,purl=TRUE------------------------------------------------------------------------------
 ## summary(mydata$"x")
 ## summary(mydata["x"])
 ## summary(mydata[,"x"])
 ## summary(mydata[["x"]])
 ## summary(mydata[1])
 ## summary(mydata[,1])
 ## summary(mydata[[1]])
 ## with(mydata, summary(x))
 ## summary(subset(mydata, select=x))
 ## summary(dplyr::select(mydata, x))
 ## library(magrittr)
 ## mydata %>% with(summary(x))
 ## mydata %>% summary(.$x)
 ## mydata %>% summary(.x)

 #' 
 #' 
 #' ]
 #' 
 #' 
 #' 
 #' 
 #' 
 #' ---
 #' ## Data for today: Penguins!
 #' 
 #' .pull-left[
 #' - Size measurements for 3 penguin species observed on 3 islands in the Palmer Archipelago (Biscoe, Dream, Torgersen), Antarctica over a study period of 3 years.
 #' - Data collected in 2007 - 2009 by Dr. Kristen Gorman with the Palmer Station Long Term Ecological Research Program `r Citep(myBib,"palmerpenguins:2020")`
 #' ]
 #' .pull-right[

 #' ]
 #' ---
 #' ## Penguins in real life
 #' .center[

 #' ]
 #' 
 #' ---
 #' ## Load Palmer penguins dataset
 #' 
 #' 
 #' .pull-left[
 ## ----loaddata---------------------------------------------------------------------------------------
 # load data
 # install.packags("palmerpenguins")
 library(palmerpenguins)

 #' 
 #' ]
 #' 
 #' .pull-right[
 #' <img src="assets/palmerpenguins.png" alt="Penguins" width="200"/> .right[.font40[Artwork by @allison_horst]]
 #' ]
 #' 
 #' 
 ## ----head, render=lemon_print-----------------------------------------------------------------------
 head(penguins,4) 

 #' 
 #' 
 #' ---
 #' ## Basic R tables
 #' - The base R functions `table()` and `summary()` provide summary tables of the data for us. 
 #' 
 #' .pull-left[
 ## ----table1-----------------------------------------------------------------------------------------
 table(penguins$species, useNA = "ifany")

 #' 
 ## ----cross-tab--------------------------------------------------------------------------------------
 # and do a cross-table
 with(penguins, table(species, sex,  useNA = "ifany"))

 #' ]
 #' 
 #' .pull-right[
 #' .scroll-box-20[
 ## ----alltable---------------------------------------------------------------------------------------
 # can get "fancy"
 lapply(penguins,function(x){table(x, useNA = "ifany")})

 #' ]
 #' ]
 #' 
 #' ---
 #' ## summary() provides a table for categorical & 5-summary for numeric data
 #' 
 #' .font80[
 ## ----summary, render=lemon_print--------------------------------------------------------------------
 # or use summary
 summary(penguins)

 #' ]
 #' 
 #' ---
 #' ## The janitor::tabyl() function is a fancier version of table
 #' 
 #' 
 ## ----janitor, render=lemon_print--------------------------------------------------------------------
 janitor::tabyl(penguins$species)

 #' 
 ## ----janitor2way, render=lemon_print----------------------------------------------------------------
 # two-way
 janitor::tabyl(penguins, species, sex)

 #' 
 #' ---
 #' ## The janitor::tabyl() function is a fancier version of table
 #' 
 #' 
 ## ----janitoradorn, render=lemon_print---------------------------------------------------------------
 # can make it fancier using adorn() functions
 janitor::tabyl(penguins, species, sex) %>%
  janitor::adorn_totals(c("row", "col")) %>%
  janitor::adorn_percentages("row") %>%
  janitor::adorn_pct_formatting(rounding = "half up", digits = 0) %>%
  janitor::adorn_ns() %>%
  janitor::adorn_title("combined")

 #' 
 #' 
 #' ???
 #' - The benefits of `janitor::tabyl()` are easier to discern once we start leveraging the `adorn_*()` functions:
 #' - Newline
 #' 
 #' ---
 #' ## janitor::tabyl() design paradigms & limitations
 #' 
 #' .pull-left[
 #' - Idea: input/output is a data frame
 #' - Tidyverse-aligned
 #' - Adorn functions allow for customisation of output
 #' - Biggest limitation: handling of numeric data
 #' ]
 #' 
 #' .pull-right[
 #' 
 #' .scroll-box-20[
 ## ----janitorNumeric---------------------------------------------------------------------------------
 janitor::tabyl(penguins,bill_length_mm) 

 #' ]
 #' ]
 #' 
 #' 
 #' ---
 #' ## Skimr `skimr::skim(penguins)`
 #' .font80[
 #' .scroll-box-20[
 ## ----skimr1-----------------------------------------------------------------------------------------
 skimr::skim(penguins)

 #' ]
 #' ]
 #' 
 #' ---
 #' ## Skimr (unrendered)
 #' .scroll-box-20[
 #' ![picture alt](assets/skimrraw1.png)
 #' 
 #' ![picture alt](assets/skimrraw2.png)
 #' 
 #' ]
 #' 
 #' 
 #' ---
 #' ## Skimr can output a tibble & works with tidy groups
 #' 
 #' .font70[
 ## ----skimistibble, render=lemon_print---------------------------------------------------------------
 skimr::skim(penguins) %>% tibble::as_tibble() %>% # is a tibble under the hood 
  dplyr::select(skim_type, skim_variable, n_missing, numeric.mean) %>% 
  dplyr::filter(skim_variable == "bill_depth_mm") 

 #' 
 #' 
 #' #### Skimr can output a cross-table
 #' 

 #' .scroll-box-20[
 ## ----crosstabskim2, echo =FALSE---------------------------------------------------------------------
 dplyr::group_by(penguins, species) %>%
  skimr::skim()

 #' ]
 #' ]
 #' 
 #' ---
 #' ## Skimr pros and cons
 #' .pull-left[
 #' ### Pros
 #' - Nice defaults for data summaries
 #' - Does not throw error with vector or matrices (coerces to df)
 #' - Works in both the console and knit output
 #' 
 #' 
 #' 
 #' 
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' ### Cons
 #' - Doesn't include functionality to support models
 #' - A bit clunky to customise
 #' - Opinionated, possibly/likely differently from your journal of interest 
 #' ]
 #' 
 #' ---
 #' ## Defining a custom summary function with skimr
 #' 
 #' .pull-left[
 ## ----newskimr---------------------------------------------------------------------------------------
 mynewskim <- 
 skimr::skim_with(
  numeric = skimr::sfl(
    nmad = mad,
    var = function(x){var(x, na.rm=TRUE)}
  )
 )


 #' ]
 #' 
 #' .pull-right[
 #' 
 #' .font30[
 ## ----newskimrDemo2,eval=T,echo=F--------------------------------------------------------------------
 dplyr::group_by(penguins) %>%
  mynewskim()

 #' ]
 #' ]
 #' 
 #' ---
 #' ## gtsummary
 #' 
 #' .pull-left[
 #' - Uses the [gt](https://blog.rstudio.com/2020/04/08/great-looking-tables-gt-0-2/) package developed at RStudio (Grammar of tables?)
 #' - Default table for European Urology, The Journal of Urology, Urology, and the British Journal of Urology International.
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' 
 #' .scroll-box-20[
 #' .font60[
 ## ----gtsummary1-------------------------------------------------------------------------------------
 gtsummary::tbl_summary(penguins)

 #' ]]
 #' ]
 #' 
 #' 
 #' 
 #' ---
 #' ## gtsummary: easy to customise output
 #' 
 #' .pull-left[
 #' .scroll-box-20[

 #' ]
 #' ]
 #' 
 #' .pull-right[
 #' .font40[
 ## ----gtsummaryCustom2, echo=F-----------------------------------------------------------------------
 gtsummary::tbl_summary(
  penguins,
  by = species,
  statistic = 
    list(gtsummary::all_continuous() ~ 
           "{median} ({p25}-{p75})",
         gtsummary::all_categorical() ~ 
           "{n} / {N} ({p}%)"),
  digits = 
    gtsummary::all_continuous() ~ 0,
  missing_text = "(Missing)"
 )

 #' ]
 #' ]
 #' 
 #' ---
 #' ## gtsummary: "statistics" customisation options
 #' 
 #' .pull-left[
 #' 
 #' Categorical
 #' 
 #' - n - frequency
 #' - N - denominator, or cohort size
 #' - p - formatted percentage
 #' 
 #' Missingness (both categorical & continuous)
 #' 
 #' - N_obs - total number of observations
 #' - N_miss - number of missing observations
 #' - N_nonmiss - number of non-missing observations
 #' - p_miss - percentage of observations missing
 #' - p_nonmiss - percentage of observations not missing
 #' 
 #' 
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' Continuous
 #' 
 #' - median - median
 #' - mean - mean
 #' - sd - standard deviation
 #' - var - variance
 #' - min - minimum
 #' - max - maximum
 #' - p## - any integer percentile, where ## is an integer from 0 to 100
 #' - foo - any function of the form foo(x) is accepted where x is a numeric vector
 #' ]
 #' 
 #' ---
 #' ## gtsummary: add p-values & multiple testing correction 
 #' 
 #' .red[
 #' Please do not misuse this functionality: all tests have assumptions that *must* be checked!
 #' ]
 #' .pull-left[

 #' ]
 #' 
 #' .pull-right[
 #' .font40[
 ## ----gtsummaryCustom3b, echo=F----------------------------------------------------------------------
 gtsummary::tbl_summary(
  penguins,
  by = species,
  missing = "no"
 ) %>%
  gtsummary::add_p(test=
    list(
      # Kruskal-Wallis rank-sum test
      gtsummary::all_continuous() ~ "kruskal.test", 
    # chi-squared test of independence
    gtsummary::all_categorical() ~ "chisq.test")) %>%
  gtsummary::add_q(method = "fdr", quiet=T)

 #' ]
 #' ]
 #' 
 #' ---
 #' ## Options for p-value calculation & MTC
 #' 
 #' .pull-left[
 #' ### p-values
 #' - "t.test" for a t-test
 #' - "aov" for a one-way ANOVA test
 #' - "wilcox.test" for a Wilcoxon rank-sum test
 #' - "kruskal.test" for a Kruskal-Wallis rank-sum test
 #' - "chisq.test" for a chi-squared test of independence
 #' - "chisq.test.no.correct" for a chi-squared test of independence without continuity correction
 #' - "fisher.test" for a Fisher's exact test
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' 
 #' - "lme4" for a random intercept logistic regression model to account for clustered data, lme4::glmer(by ~ variable + (1 | group), family = binomial). The by argument must be binary for this option.
 #' 
 #' ### q-values
 #' - From `stats::p.adjust`
 #' - "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr"
 #' 
 #' - *Which test? See SIH's Research Essentials & Linear Models 1/2 training or reach out to our [Statistics Consulting](https://www.sydney.edu.au/research/facilities/sydney-informatics-hub/project-support/request.html) service*
 #' ]
 #' 
 #' ---
 #' ## Finalfit
 #' .pull-left[

 #' 
 #' ]
 #' 
 #' .pull-right[
 #' .font60[
 ## ----finalfit1b,echo=F, render=lemon_print----------------------------------------------------------
 penguins %>%
  finalfit::summary_factorlist(
    # dependent,
    "species",
    # explanatory
    setdiff(names(penguins), "species"))

 #' ]
 #' ]
 #' 
 #' ---
 #' ## Finalfit also supports statistical testing and MTC
 #' 
 #' .pull-left[

 #' 
 #' ]
 #' 
 #' .pull-right[
 #' .font60[
 ## ----finalfitmodel1b, echo=F------------------------------------------------------------------------
 penguins %>%
  finalfit::summary_factorlist(
    # dependent,
    "species",
    # explanatory
    setdiff(names(penguins), "species"),
    # chi-squared for categorical
    # aov for continuous
    p = TRUE) %>%
  gt::gt()

 #' ]]
 #' 
 #' ---
 #' ## Different ways of specifying non-parametric tests
 #' 
 #' .pull-left[

 #' 
 #' ]
 #' 
 #' .pull-right[
 #' .font60[
 ## ----finalfitmodel2b, echo=F------------------------------------------------------------------------
 penguins %>%
  finalfit::summary_factorlist(
    # dependent,
    "species",
    # explanatory
    setdiff(names(penguins), "species"),
    # chi-squared for categorical
    # aov for continuous
    p = TRUE,
    cont = "median") %>%
  gt::gt()

 #' ]]
 #' 
 #' ---
 #' ## Finalfit: summary statistics & tests
 #' 
 #' .pull-left[
 #' - For continuous explanatory variables: "mean" (standard deviation) or "median" (interquartile range). If "median" then non-parametric hypothesis test performed.
 #' - Can also set non-parametric for only a few options with `cont_nonpara`
 #' - Can set which quartiles to show with `cont_range`
 #' - Continuous variable test: analysis of variance,  Welch two sample t-test or kruskal.test (nonparam)
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' - For continous dependent and continuous explanatory, the parametric test p-value returned is for the Pearson correlation coefficient. The non-parametric equivalent is for the p-value for the Spearman correlation coefficient.
 #' - Categorical:  either "chisq" or "fisher".
 #' ]
 #' 
 #' ???
 #' - Extra notes on continous
 #' - Continuous variable parametric test: "aov" (analysis of variance) or "t.test" for Welch two sample t-test. Non-parametric test is always Kruskal Wallis (kruskal.test) which in two-group setting is equivalent to Mann-Whitney U /Wilcoxon rank sum test.
 #' 
 #' ---
 #' ## Labelled data 
 #' 
 #' 
 #' 
 #' .pull-left[
 #' - Use a package like `labelled` to set a human-readable label for each variable
 #' (other packages like `sjlabelled` should be compatible with these labels,
 #' they all do more or less the same thing).
 #' 
 #' - The labels might not be that different to the actual variable names,
 #' but it's nice to be able to do things like having units on the end in brackets.
 #' 
 #' - Labels sometimes get lost after filtering/recalculating the data,
 #' so you may have to set variable labels again closer to where you're
 #' going to use them.
 #' ]
 #' 
 #' .pull-right[
 #' .font60[
 ## ----labeldata--------------------------------------------------------------------------------------
 penguins2 <- penguins %>%
 labelled::set_variable_labels(
 species = "Species",
 island = "Island",
 bill_length_mm = "Bill length (mm)",
 flipper_length_mm ="Flipper length (mm)",
 bill_depth_mm = "Bill depth (mm)",
 body_mass_g = "Body mass (g)",
 sex = "Sex",
 year = "Year"
 )
 labelled::var_label(penguins2)[4:5]

 #' ]
 #' ]
 #' 
 #' ---
 #' ## Labelled data - both finalfit & gtsummary work well
 #' 
 #' .pull-left[
 #' .font30[
 ## ----gtsummary_label--------------------------------------------------------------------------------
 gtsummary::tbl_summary(
    penguins2,
    by = "species")

 #' ]
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' .font40[
 ## ----finalfit_label, render=lemon_print-------------------------------------------------------------
 finalfit::summary_factorlist(
 penguins2,
 dependent = "species",
 explanatory = 
  setdiff(colnames(penguins), 
   "species"))

 #' ]]
 #' 
 #' ---
 #' ## Let's build a basic model to predict species
 #' 
 #' We expect your use case to be more like predicting disease, treatment or other binary outcome.
 #' 
 #' .pull-left[
 ## ----MakeDataset,echo=F-----------------------------------------------------------------------------
 penguins_filt <- penguins %>% 
  dplyr::filter(species != "Gentoo") %>%
  dplyr::mutate(species = 
                  droplevels(species))


 #' ]
 #' 
 #' .pull-right[
 ## ----Visualise2, echo=F,warning=F, fig.width=6,fig.height=5,dpi=200---------------------------------
 penguins_filt %>% ggplot2::ggplot(
  ggplot2::aes(
    x = flipper_length_mm,
    y = bill_length_mm,
    col = species)) +
    ggplot2::geom_point() +
  ggplot2::theme_minimal() +
  ggplot2::theme(legend.position="bottom")

 #' ]
 #' 
 #' ???
 #' We are using a logistic regression model here as an example because very often in the biomedical literature we are trying to predict a binary outcome (healthy vs diseased, control vs treatment etc). It makes no sense in the context of our data (and our data,frustratingly for teaching purposes, is perfectly separable by species).
 #' 
 #' ---
 #' ## Model summaries with gtsummary: multivariable
 #' 
 #' .pull-left[
 ## ----buildmodel,message=F,warning=F, render=lemon_print---------------------------------------------
 m1 <- 
  glm(species ~ 
      flipper_length_mm+bill_length_mm,
    # Only for Adelie & Chinstrap
    penguins_filt,
    family = binomial)
 cbind(exp(
  cbind(OR = coef(m1), 
        confint(m1))),
  pval = coef(summary(m1))[,4]) %>%
  as.data.frame()

 #' 
 #' ]
 #' 
 #' .pull-right[
 ## ----gtsumModel,warning=F---------------------------------------------------------------------------
 gtsummary::tbl_regression(
  m1, exponentiate = TRUE)

 #' 
 #' ]
 #' 
 #' ---
 #' ## Model summaries with gtsummary: univariable
 #' 
 #' .pull-left[

 #' ]
 #' 
 #' .pull-right[
 #' .font60[
 ## ----modelunivGTs2, eval=T,echo=F,warning=F,message=F-----------------------------------------------
 penguins_filt %>%
  dplyr::select(species, 
                bill_length_mm, 
                flipper_length_mm) %>%
  gtsummary::tbl_uvregression(
    method = glm,
    y = species,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    pvalue_fun = 
      ~gtsummary::style_pvalue(.x, digits = 2)
  ) %>%
  gtsummary::add_global_p() %>%  # add global p-value 
  gtsummary::add_nevent() %>%    # add number of events of the outcome
  gtsummary::add_q() %>%         # adjusts global p-values for multiple testing
  gtsummary::bold_p() %>%        # bold p-values under a given threshold (default 0.05)
  gtsummary::bold_p(t = 0.10, q = TRUE) %>% # now bold q-values under the threshold of 0.10
  gtsummary::bold_labels()

 #' ]]
 #' 
 #' ---
 #' ## Model summaries with finalfit: simultaneous
 #' 
 ## ----buildmodelff,message=F,warning=F, render=lemon_print-------------------------------------------
 ffmodeldemo <-
  penguins_filt %>%
    finalfit::finalfit(
      "species", 
      c("flipper_length_mm", "bill_length_mm"))
 ffmodeldemo

 #' 
 #' ---
 #' ## Finalfit also provides odds ratio/hazard ratio plots
 #' 
 #' .pull-left[

 #' ]
 #' 
 #' .pull-right[
 ## ----plotdemo2,warning=F,message=F,echo=F-----------------------------------------------------------
 penguins_filt %>%
  finalfit::or_plot(
    "species",
    c("flipper_length_mm", "bill_length_mm"))

 #' ]
 #' 
 #' ---
 #' ## Which modeling packages do finalfit & gtsummary support?
 #' 
 #' .pull-left[
 #' #### GTsummary
 #' - `stats::lm()`
 #' - `stats::glm()`
 #' - `survival::coxph()`
 #' - `survival::clogit()`
 #' - `survival::survreg()`
 #' - `lme4::glmer()`
 #' - `lme4::lmer()`
 #' - `geepack::geeglm()`
 #' - [Broom::tidy()-see here](http://www.danieldsjoberg.com/gtsummary/reference/vetted_models.html)
 #' ]
 #' 
 #' .pull-right[
 #' #### Finalfit
 #' - `stats::lm()`
 #' - `stats::glm()`
 #' - `survival::coxph()`
 #' - `lme4::glmer()`
 #' - `lme4::lmer()`
 #' - `survey::svyglm()`
 #' - `cmprsk::crr()`
 #' -  *Bayesian logistic regression: Our own particular rstan models are supported and will be documented in the future.* [Undocumented by authors...]
 #' ]
 #' 
 #' 
 #' 
 #' ---
 #' ## Exporting to "useful" formats: html, Word, PDF
 #' 
 #' .pull-left[
 #' - Two use cases: within R & outside of it
 #' - We usually want to export our "pretty" summary tables to html, .docx (Word) and/or PDF
 #' - R has several libraries and engines that enable such export: most things work well with HTML, but this is rarely the format of interest
 #' - Once again, this is not an exhaustive list!
 #' - NB: huxtable uses flextable to produce Word output
 #' - NB2: export to PDF requires LaTeX
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' ![GTSummaryOutputOptions](assets/gtsummarysnapshot.png)
 #' .right[<sup>[Table source](http://www.danieldsjoberg.com/gtsummary/articles/rmarkdown.html) </sup>]]
 #' 
 #' 
 #' ---
 #' ## Exporting to Word: gtsummary most straightforward
 #' 
 #' .pull-left[
 ## ----setupTables, echo = F--------------------------------------------------------------------------
 skimsum <- skimr::skim(penguins)
 gtsum <- gtsummary::tbl_summary(penguins) 
 finalfitsum <- 
  finalfit::summary_factorlist(
    penguins,
    "species",
    setdiff(names(penguins), 
            "species"))

 ## ----tablesflext,eval=F-----------------------------------------------------------------------------
 ## gtsum %>%
 ##   gtsummary::as_flex_table() %>%
 ##   # heaps of formatting options can
 ##   # be added here
 ##   flextable::save_as_docx(
 ##     path="gtsummarydemo.docx")

 #' ]
 #' 
 #' .pull-right[
 #' 
 #' ![picture alt](assets/gtsummarywordphoto.png)
 #' 
 #' ]
 #' 
 #' ---
 #' ## Finalfit to Word
 #' 
 #' .pull-left[
 ## ----FF2Word, echo=T,eval=F-------------------------------------------------------------------------
 ## save(finalfitsum,ffmodeldemo,
 ##      file = "finalfitsum.rda")
 ## rmarkdown::render("FinalFit2WordDemo.Rmd")

 #' 
 #' - Make sure to check [this page](https://rmarkdown.rstudio.com/articles_docx.html) for working with Word Styles with Rmarkdown and beyond
 #' 
 #' ]
 #' 
 #' .pull-right[
 #' .scroll-box-20[
 ## ----comment='', echo=F-----------------------------------------------------------------------------
 cat(readLines('FinalFit2WordDemo.Rmd'), sep = '\n')

 #' ]]
 #' 
 #' 
 #' ---
 #' 
 #' ## Finalfit to Word: output
 #' 
 #' 
 #' .center[

 #' ]
 #' 
 #' 
 #' 
 #' ---
 #' ## Exporting to PDF: gtsummary & finalfit
 #' 
 #' .pull-left[
 #' - Requires LaTeX
 #' - For finalfit similar to Word export best with custom Rmd
 #' 
 ## ----gtsummaryQ, eval=F, warning=F,message=F--------------------------------------------------------
 ## gtsum %>%
 ##   gtsummary::as_kable_extra() %>%
 ##   kableExtra::kable_classic_2(
 ##     full_width = T) %>%
 ##   kableExtra::save_kable("gtsum.pdf")

 #' ]
 #' 
 #' .pull-right[
 #' - Can also generate raw LaTeX
 #' - If you require PDF output may be more prudent to write/work the entire report and analysis in RMarkdown, and render the whole thing into PDF
 #' 
 #' 
 #' ]
 #' 
 #' 
 #' 
 #' 
 #' 
 #' ---
 #' ## Summary by downloads
 #' 
 #' .pull-left[
 #' ### Skimr
 #' - Week: ![Day](https://cranlogs.r-pkg.org/badges/last-week/skimr)
 #' - Month: ![Month](https://cranlogs.r-pkg.org/badges/skimr)
 #' - Grand total: ![Grand total](https://cranlogs.r-pkg.org/badges/grand-total/skimr)
 #' 
 #' ### Janitor
 #' - Week: ![Day](https://cranlogs.r-pkg.org/badges/last-week/janitor)
 #' - Month: ![Month](https://cranlogs.r-pkg.org/badges/janitor)
 #' - Grand total: ![Grand total](https://cranlogs.r-pkg.org/badges/grand-total/janitor)
 #' 
 #' ]
 #' 
 #' 
 #' .pull-right[
 #' ### Finalfit
 #' - Week: ![Day](https://cranlogs.r-pkg.org/badges/last-week/finalfit)
 #' - Month: ![Month](https://cranlogs.r-pkg.org/badges/finalfit)
 #' - Grand total: ![Grand total](https://cranlogs.r-pkg.org/badges/grand-total/finalfit)
 #' 
 #' 
 #' 
 #' ### Gtsummary
 #' - Week: ![Day](https://cranlogs.r-pkg.org/badges/last-week/gtsummary)
 #' - Month: ![Month](https://cranlogs.r-pkg.org/badges/gtsummary)
 #' - Grand total: ![Grand total](https://cranlogs.r-pkg.org/badges/grand-total/gtsummary)
 #' 
 #' 
 #' 
 #' ]
 #' 
 #' 
 #' ---
 #' ## What table package should I choose - console?
 #' 
 #' 
 #' .pull-left[
 #' - No dependencies: `table()` & `summary()`
 #' - Doing data cleaning & quick: `janitor::tabyl()`
 #' ]
 #' 
 #' .pull-right[
 #' - Want something fancier? Have the screen real estate? `skimr::skim()`
 #' ]
 #' 
 #' ---
 #' ## What table package should I choose - publication?
 #' ### Finalfit
 #' 
 #' 
 #' #### Pros
 #' - "Looks" prettier straight out of the box, especially for generic summary tables
 #' - More robust interface for models
 #' 
 #' #### Cons
 #' - "Harder" to specify custom p-value and q-value functions
 #' - Less flexibility for output summary statistics than `gtsummary()`
 #' - Undocumented functionality?!?!?
 #' 
 #' 
 #' ---
 #' ## What table package should I choose - publication?
 #' ### gtsummary
 #' 
 #' #### Pros
 #' - Easy to export straight to Word/PDF on the fly
 #' - Can be customised quite extensively with `flextable::` and `huxtable::` libraries to generate attractive output
 #' - Works with the RStudio-developed `gt::` package, so support & development are highly likely
 #' - Good documentation that makes finding supported models easy
 #' 
 #' #### Cons
 #' - Requires more code to customise model comparisons
 #' 
 #' 
 #' ---
 #' ## Other packages
 #' - `arsenal::` - meant to reproduce SAS macros %table and %summary
 #' - [More packages in an overview from 2018](https://dabblingwithdata.wordpress.com/2018/01/02/my-favourite-r-package-for-summarising-data/): `Hmisc::describe()`, `pastecs::stat.desc()`, `psych::describe()`, `summarytools::descr()`
 #' - [More packages in a github gist from 2020](https://gist.github.com/davebraze/f73cc377fdee3b0b0373f0abc5a9725b): `tableone::CreateTableOne()`, `table1::table1()`, `Gmisc::mergeDesc()`, `tangram`, `modelsummary::modelsummary()` (uses broom!), `sjPlot::tab_model()`
 #' - [RStudio table contest](https://community.rstudio.com/tag/table-contest)
 #' 
 #' 
 #' ---
 #' ## Questions, post-training survey & suggestions
 #' 
 #' 
 #' 
 #' .pull-left[
 #' 
 #' - Please fill out the survey in the chat (Choose "Data Science Short Training - Tables") 
 #' - https://tinyurl.com/sihsurvey
 #' - If you have suggestions for new courses (or would like to see & upvote what others have suggested) please see here: https://tinyurl.com/sihshort
 #' - (Reminder) Suggest longer-form training: https://tinyurl.com/suggesttraining
 #' - TRAINING MAILING LIST: https://tinyurl.com/traininglistsih
 #' ]
 #' 
 #' .pull-right[
 #' <iframe src="https://giphy.com/embed/26BRqPg05olzXG1bi" width="480" height="428" frameBorder="0" class="giphy-embed" allowFullScreen></iframe><p><a href="https://giphy.com/gifs/studiosoriginals-26BRqPg05olzXG1bi">via GIPHY</a></p>
 #' ]
 #' 
 #' 
 #' ---
 #' ## References
 #' 

 #' 
 #' - Marius Mather: code/content on labelled data rendering with `gtsummary` and `finalfit`
 #' 
 #'