calpolystat · February 22, 2018 05:58
diff --git a/#Hothand.txt b/#Hothand.txt
 Hot Hand Phenomenon Shiny App
 
 Base R code created by Kevin Ross
 Shiny app files created by Kevin Ross

 Cal Poly Statistics Dept Shiny Series 
 http://statistics.calpoly.edu/shiny
diff --git a/app.R b/app.R
 # Shiny app to accompany
 # Ross, K.J. (2016) "Classroom Investigations of Recent Research
 # Concerning the Hot Hand Phenomenon". Working paper, Oct 2016.

 if (!require("shiny")) install.packages("shiny")
 if (!require("shinyBS")) install.packages("shinyBS")
 if (!require("plyr")) install.packages("plyr")
 if (!require("ggplot2")) install.packages("ggplot2")

 library(shiny)
 library(shinyBS)
 library(plyr)
 library(ggplot2)

 # function that returns basic streak statistics given a sequence
 source("streak_stats.r")

 # List of descriptions of streak statistics
 streak_stat_options = as.list(c("Proportion of S after streaks of S" = "phat_after_Sstreak",
                                "Difference in proportion of S (after streaks of S - other trials)" = "phat_Sstreak_vs_others",
                                "Difference in proportion of S (after streaks of S - after streaks of F)" = "phat_Sstreak_vs_Fstreak",
                                "Frequency of S streaks" = "Sstreak_frequency",
                                "Longest run of S" = "longest_run_success",
                                "Total number of runs" = "n_runs"
                                # "Number of runs of S" = "n_runs_success",
                                # "Proportion of S after streaks of F" = "phat_after_Fstreak",
                                # "Number of runs of F" = "n_runs_failure",
                                # "Longest run of F" = "longest_run_failure"
 ))

 # global constants
 max_n_repetitions = 20000  # maximum number of repetitions
 max_n_dots = 113  # maximum number of dots to show before histogram
 max_streak_length = 7  # maximum alloable streak length
 observed_color = "#ffa500"  # orange
 pvalue_color = "#00bfff"   # skyblue
 `%then%` <- shiny:::`%OR%` # for Shiny validate (see validate help)


 ui <- fluidPage(
  theme = "bootswatch-cerulean.css",
  titlePanel("Randomization-based Analysis of the Hot Hand Phenomenon"),
  sidebarLayout(
    sidebarPanel(
      selectInput(inputId = "statistic",
                  label = "Choose a streak statistic:",
                  choices = streak_stat_options,
                  selected = "phat_after_Sstreak"
      ),
      numericInput(inputId = "streak_length",
                   label = "Define streak length:",
                   value = 1,
                   min = 1, max = max_streak_length, step = 1),
      radioButtons(inputId = "input_type",
                   label = "Select method to input observed data:",
                   choices = c("Input results sequence"= "input_data",
                               "Input summary statistics"="input_stat"),
                   selected = "input_data"),
      conditionalPanel(
        condition = "input.input_type == 'input_stat'",
        textInput(inputId = "n_trials",
                  label = "Number of trials:",
                  value = ""),
        textInput(inputId = "n_success",
                  label = "Total number of successes:", value = ""),
        textInput(inputId = "observed_stat",
                  label = "Observed value of streak statistic:",
                  value = "")
      ),
      conditionalPanel(
        condition = "input.input_type == 'input_data'",
        textInput(inputId = "observed_data",
                  label = "Enter observed results sequence",
                  value = "")
      ),
      actionButton(inputId = "run",
                   label="Accept inputs and initialize simulation"),
      tags$hr(style = "border-color: #000000;"),
      numericInput(inputId = "n_repetitions",
                   label = "Number of shuffles to simulate:",
                   value = 1,
                   min = 1, max = max_n_repetitions),
      actionButton(inputId = "more_repetitions",
                   label="Shuffle"),
      checkboxInput(inputId = "compute_pvalue",
                    label = "Compute p-value",
                    value = FALSE),
      checkboxInput(inputId = "recent_shuffle",
                    label = "Show most recent shuffle",
                    value = FALSE),
      actionButton(inputId = "reset",
                   label = "Clear plot"),
      div("Shiny app by",
          a(href="mailto:[email protected]",target="_blank",
            "Kevin Ross"),align="right", style = "font-size: 8pt"),
      div("Base R code by",
          a(href="mailto:[email protected]",target="_blank",
            "Kevin Ross"),align="right", style = "font-size: 8pt"),
      div("Shiny source files:",
          a(href="https://gist.github.com/calpolystat/d40a02fa87508ac5ac4b",
            target="_blank","GitHub Gist"), 
          align="right", style = "font-size: 8pt"),
      div(a(href="http://www.statistics.calpoly.edu/shiny",target="_blank",
            "Cal Poly Statistics Dept Shiny Series"),
          align="right", style = "font-size: 8pt")
    ),
    mainPanel(
      tabsetPanel(tabPanel(title = "Simulation app",
                           tags$b(tags$h4("Observed data",
                                          style = paste("color: ",
                                                        observed_color, ";"))),
                           tableOutput(outputId = "summary_stats"),
                           # bsAlert(anchorId = "alert_input_type"),
                           # bsAlert(anchorId = "alert_input_data"),
                           # bsAlert(anchorId = "alert_input_streak"),
                           bsAlert(anchorId = "alert_update_stat"),
                           tags$hr(style = "border-color: #000000;"),
                           bsAlert(anchorId = "alert_nrep"),
                           # bsAlert(anchorId = "alert_click_button"),
                           tags$b(tags$h4(textOutput(outputId = "plot_title"),
                                          style = "text-align: center;")),
                           textOutput(outputId = "recent_shuffle"),
                           textOutput(outputId = "recent_stat"),
                           plotOutput(outputId = "null_plot"),
                           tags$b(tags$h4(textOutput(outputId = "observed_stat"),
                                          style = paste("color: ",
                                                        observed_color, ";"))),
                           tags$b(tags$h4(textOutput(outputId = "pvalue"),
                                          style = paste("color: ",
                                                        pvalue_color, ";")))
      ),
      tabPanel(title = "Instructions and notes",
               includeHTML("instructions.html")),
      tabPanel(title = "References",
               includeHTML("references.html")),
      id = "current_tab",
      selected = "Simulation app"
      )
    )
  )
 )

 server <- function(input, output, session){
  
  ##### Validate inputs
  
  # choose statistic - just read input
  streak_stat_name = reactive({
    return(names(streak_stat_options)[which(streak_stat_options == input$statistic)])
  })
  
  observeEvent(c(input$statistic, input$streak_length), {
    if (input$input_type == 'input_stat' && input$observed_stat != ""){
      createAlert(session, anchorId = "alert_update_stat",
                  content = "Be sure to update the value of the observed statistic.")
    }
  })
  
  # length of streak - check integer between 1 and 7
  streak_length = reactive({
    validate(
      need(try(input$streak_length%%1 == 0),
           "Streak length must be a whole number")%then%
        need(try(input$streak_length >= 1),
             "Streak length must be at least 1")%then%
        need(try(input$streak_length <= max_streak_length),
             paste("Streak length cannot be more than ",max_streak_length))
    )
    return(input$streak_length)
  })
  
  # input data method - check that sequence is valid (just 1s and 0s)
  observed_results = reactive({
    if (input$input_type == 'input_data'){
      x = unlist(strsplit(input$observed_data, split=","))
      validate(
        need(setequal(x, c("0","1")),
             "Please enter observed results (1 for success, 0 for failure)
                  in sequence separated by commas
             (e.g. 0,1,1,1,0,0,1,0,1,1)")
      )
      return(as.numeric(x))
    }
  })
  
  # number of trials - check positive integer greater than streak length
  n_trials = reactive({
    if (input$input_type == 'input_stat'){
      validate(
        need(input$n_trials != "",
             "Please input number of trials")%then%
          need(try(as.numeric(input$n_trials) > streak_length()),
               "Number of trials must be greater than streak length")%then%
          need(try(as.numeric(input$n_trials)%%1 == 0),
               "Number of trials must be a whole number")
      )
      return(as.numeric(input$n_trials))
    }else{
      return(length(observed_results()))
    }
  })
  
  # number of successes - check positive integer less than number of trials
  n_success = reactive({
    if (input$input_type == 'input_stat'){
      validate(
        need(input$n_success != "",
             "Please input observed number of successes")%then%
          need(try(as.numeric(input$n_success) < n_trials()),
               "Number of successes must be less than number of trials")%then%
          need(try(as.numeric(input$n_success) >= streak_length()),
               "Number of successes must be greater than streak length")%then%
          need(try(as.numeric(input$n_success)%%1 == 0),
               "Number of successes must be a whole number")
      )
      return(as.numeric(input$n_success))
    }else{
      return(sum(observed_results()))
    }
  })
  
  
  #### Calculations
  
  # input data method - compute observed stats
  observed_streak_stats <- reactive({
    if (input$input_type == 'input_data'){
      return(streak_stats(observed_results(), streak_length()))
    }
  })
  
  # compute observed value of selected streak stat
  observed_stat <- reactive({
    if (input$input_type == 'input_data'){
      x = observed_streak_stats()[[input$statistic]]
    }else{
      x = as.numeric(input$observed_stat)
    }
    return(x)
  })
  
  # invalidate the simulation when any of the inputs change
  is_sim_valid <- reactiveValues(yes = TRUE)
  
  observeEvent(input$input_type, {
    is_sim_valid$yes = FALSE
    createAlert(session, anchorId = "alert_input_type",
                content = "Enter data and click Accept Inputs.")
  })
  
  observeEvent(c(input$n_trials, input$n_success, input$observed_data), {
    is_sim_valid$yes = FALSE
    createAlert(session, anchorId = "alert_input_data",
                content = "Observed data has changed.  Click Accept Inputs.")
  })
  
  observeEvent(input$streak_length, {
    is_sim_valid$yes = FALSE
    createAlert(session, anchorId = "alert_input_streak",
                content = "Streak length has changed.  Click Accept Inputs.")
  })
  
  # Simulate random permutations
  # Given # of trials and # of successes
  random_permutations = eventReactive(input$run, {
    is_sim_valid$yes = TRUE
    x = c(rep(1, n_success()), rep(0, n_trials() - n_success()))
    y = matrix(
      replicate(max_n_repetitions,
                sample(x, size = n_trials(), replace = F)),
      byrow=TRUE, nrow=max_n_repetitions)
    return(y)
  })
  
  # compute streak statistics for the random permutations
  null_dist = eventReactive(input$run, {
    withProgress(message = "Initializing simulation, please wait.",
                 detail = "When this message disappears you can use the simulate shuffles button.",
                 {
                   x = as.data.frame(
                     adply(random_permutations(), 
                           1, streak_stats, streak_length = streak_length()))
                 })
    return(x)
  })
  
  # update counter for producing the plot
  total_nrep <- reactiveValues(current = 0)
  observeEvent(input$more_repetitions, {
    if (is_sim_valid$yes == FALSE){
      createAlert(session, anchorId = "alert_click_button",
                  content = "You must click the Accept Inputs
                  button before simulating values.")
      total_nrep$current = 0
    }else{
      total_nrep$current = isolate(input$n_repetitions) + 
        isolate(total_nrep$current)
      if (total_nrep$current > max_n_repetitions){
        total_nrep$current = max_n_repetitions
        createAlert(session, anchorId = "alert_nrep",
                    content = paste("Note: ", max_n_repetitions,
                                    "is the maximum number of repetitions
                                  the app will run"))
      }
    }
  })
  
  # Reset the simulation when inputs change
  observe({
    input$run
    input$reset
    input$streak_length
    input$n_trials
    input$n_success
    input$observed_data
    total_nrep$current = 0
  })
  
  # all simulated values of the current statistic
  all_values <- reactive({
    y = null_dist()[[input$statistic]]
    return(y)
  })
  
  # values to plot; plot is updated incrementally
  plot_values <- reactive({
    if (total_nrep$current > 0){
      y = all_values()[1:total_nrep$current]
      return(y[!is.na(y)])
    }
  })
  
  # compute p-value (and numerator and denominator)
  pvalue <- reactive({
    if (input$statistic == "n_runs"){
      x = sum(plot_values() <= observed_stat())
    }else{
      x = sum(plot_values() >= observed_stat())
    }
    return(c("nrep" = length(plot_values()),
             "count" = x,
             "approx" = round(x / length(plot_values()), 4)))
  })
  
  #### Outputs
  
  # Table of summary stats
  output$summary_stats <- renderTable({
    x = data.frame(
      c("Number of trials", "Number of successes", streak_stat_name()),
      c(n_trials(), n_success(), observed_stat())
    )
    colnames(x) = c("Statistic", "Observed value")
    return(x)
  },
  include.rownames = FALSE, display = c("d","s","f") #, digits = 2
  )
  
  # Title of the plot of the null distribution (and pvalue calc)
  output$plot_title <- renderText({
    if (total_nrep$current>0){
      paste("Null distribution of streak statistic: ",
            streak_stat_name())}
  })
  
  # Most recent shuffle
  output$recent_shuffle <- renderText({
    if (input$recent_shuffle && total_nrep$current>0){
      paste("Most recent shuffle: ",
            toString(random_permutations()[total_nrep$current, ]))
    }
  })
  
  # Most recent value of streak statistic
  output$recent_stat <- renderText({
    if (input$recent_shuffle && total_nrep$current>0){
      if (!is.na(all_values()[total_nrep$current])){
        paste("Value of streak statistic for this shuffle: ",
              round(all_values()[total_nrep$current],4))
      }else{
        paste("Value of streak statistic cannot be computed for this shuffle\n
              since there are no streaks of length", streak_length())
      }
    }
  })
  
  # The plot of the null distribution and p-value
  output$null_plot <- renderPlot({
    x_axis_limits = range(c(all_values(),observed_stat()), na.rm=TRUE)*c(0.95,1.05)  ### Need c(.95, 1.05)???
    if (total_nrep$current>0){
      max_y_dots = all_values()[!is.na(all_values())]
      max_y_dots = max_y_dots[1:max_n_dots]
      if (input$statistic == "n_runs"){
        color_value = plot_values() <= observed_stat()
      }else{
        color_value = plot_values() >= observed_stat()
      }
      plot_data = data.frame(plot_value = plot_values(), color_value)
      null_plot = ggplot(data=plot_data, aes(x=plot_value, fill=color_value))
      if (pvalue()[["nrep"]] < max_n_dots){
        null_plot = null_plot +
          geom_dotplot(dotsize = .4,
                       na.rm = TRUE,
                       binwidth = (x_axis_limits[2]-x_axis_limits[1])/30) +
          scale_y_continuous(expand = c(0.01, 0.01),limits=c(0,max_n_dots/2), name="", breaks = NULL)
      }else{
        null_plot = null_plot +
          geom_histogram(colour="black",
                         binwidth = (x_axis_limits[2]-x_axis_limits[1])/50) +
          scale_y_continuous(expand = c(0.01, 0.01))
      }
      if (input$compute_pvalue && !is.na(pvalue()[["approx"]]) && !is.na(observed_stat())){
        if (input$statistic == "n_runs"){
          shade_min = -Inf
          shade_max = observed_stat()
        }else{
          shade_min = observed_stat() 
          shade_max = Inf
        }
        pvalue_fill_scale <- scale_fill_manual(
          name = "pvalue_color", 
          values = as.character(c("FALSE"="white", "TRUE"=pvalue_color)),
          limits = c("FALSE","TRUE")
        )
        null_plot = null_plot +
          pvalue_fill_scale +
          annotate("rect", xmin = shade_min, xmax = shade_max, ymin = -Inf, ymax = 0, fill=pvalue_color, color=pvalue_color) + 
          geom_vline(xintercept = observed_stat(), color = observed_color, linetype="dashed", size = 1.5)
      }else{
        null_plot = null_plot + scale_fill_manual(values=c("white","white"))
      }
      null_plot = null_plot +
        annotate("text",  x=Inf, y = Inf,
                 label = paste("Median = ", round(median(plot_values()),3),
                               "\n Mean = ", round(mean(plot_values()),3),
                               "\n SD = ", round(sd(plot_values()),3)),
                 vjust=1, hjust=1) +
        coord_cartesian(xlim=c(x_axis_limits)) + 
        ggtitle(paste("Based on ",length(plot_values())," simulated values
                      \n (resulting from  ", total_nrep$current," repetitions of the simulation)")) +
        theme(plot.title = element_text(hjust = 0.5)) +
        labs(x="Streak statistic",y="Frequency") +
        theme(legend.position = "none",
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              panel.background = element_blank(),
              axis.line.x = element_line(color="black", size = 0.5),
              axis.line.y = element_line(color="black", size = 0.5),
              plot.title = element_text(face="bold", size=14),
              axis.title = element_text(face="bold", size=14))
      print(null_plot)
    }
  })
  
  # Output value of observed statistic for plot
  output$observed_stat <- renderText({
    if (input$compute_pvalue){ 
      if (is.na(observed_stat()) & input$input_type == 'input_data'){
        paste("Warning: observed value of the streak statistic cannot be computed
              because there are no trials following a streak of length ",
              streak_length(), " in the observed data")
      }else if (is.na(observed_stat()) & input$input_type == 'input_stat'){
        paste("Please enter the observed value of the streak statistic")
      }else{ ####
        paste("Observed value of streak statistic = ", round(observed_stat(),4))
      }
    }
  })
  
  # Output the p-value calculation
  output$pvalue <- renderText({
    if (input$compute_pvalue){ 
      if (is.na(observed_stat())){
        paste("The p-value cannot be computed because the observed statistic cannot be computed")
      }else if (pvalue()[["nrep"]] == 0){
        paste("There must be at least one simulated value of the streak statistic in order to compute an approximate p-value")
      }else{
        paste("Simulated p-value = ", pvalue()[["count"]], "/",
              pvalue()[["nrep"]], " = ",
              pvalue()[["approx"]])
      }
    }
  })
 }

 shinyApp(ui = ui, server = server)
diff --git a/DESCRIPTION b/DESCRIPTION
 Title: Hot Hand Phenomenon
 Author: Kevin Ross
 AuthorUrl: https://statistics.calpoly.edu
 License: MIT
 DisplayMode: Normal
 Tags: Simulation-based inference, Hot hand in basketball, Hot hand fallacy
 Type: Shiny
diff --git a/LICENSE b/LICENSE
 The MIT License (MIT)
 
 Copyright (c) 2017 Kevin Ross
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
diff --git a/streak_stats.r b/streak_stats.r
 # function that returns basic streak statistics given a sequence
 streak_stats = function(results_sequence, streak_length){
  n_trials = length(results_sequence)
  n_success = sum(results_sequence)
  
  run_lengths = rle(results_sequence)$lengths
  if_run_success = rle(results_sequence)$values
  n_runs = length(run_lengths)
  
  # runs of successes
  longest_run_success = max(run_lengths*if_run_success)
  n_runs_success = sum(if_run_success)
  
  # successes after streaks of Successes
  run_lengths_adj = run_lengths - streak_length + 1
  run_values_adj_S =
    if_run_success *
    (!((if_run_success == 1) &
         (run_lengths < streak_length)))
  n_after_Sstreak = sum(run_lengths_adj * run_values_adj_S) -
    run_values_adj_S[length(run_values_adj_S)]
  n_success_after_Sstreak = sum((run_lengths - streak_length) * run_values_adj_S)
  
  # runs of failures
  longest_run_failure = max(run_lengths*(1-if_run_success))
  n_runs_failure = sum(1-if_run_success)
  
  # successes after streaks of Failures
  run_values_adj_F =
    (1-if_run_success) *
    (!((if_run_success == 0) &
         (run_lengths < streak_length)))
  n_after_Fstreak = sum(run_lengths_adj * run_values_adj_F) -
    run_values_adj_F[length(run_values_adj_F)]
  n_success_after_Fstreak = n_after_Fstreak -
    sum((run_lengths - streak_length) * run_values_adj_F)
  
  # Proportion of hits on trials after streaks
  phat_after_Sstreak = n_success_after_Sstreak / n_after_Sstreak
  
  # Difference in proportion of hits (trials after streaks - all other trials)
  phat_after_no_Sstreak = (n_success - n_success_after_Sstreak)/
    (n_trials - n_after_Sstreak)
  phat_Sstreak_vs_others = phat_after_Sstreak - phat_after_no_Sstreak
  
  # Difference in proportion of hits (trials after hit streaks - trials after miss streaks)
  phat_after_Fstreak = n_success_after_Fstreak / n_after_Fstreak
  phat_Sstreak_vs_Fstreak = phat_after_Sstreak - phat_after_Fstreak
  
  # Hit streak frequency - proportion of trials that occur after hit streaks
  Sstreak_frequency  = n_after_Sstreak / (n_trials - streak_length)
  
  # collect the streak stats for the results sequence
  return(data.frame(phat_after_Sstreak,
                    phat_Sstreak_vs_others,
                    phat_Sstreak_vs_Fstreak,
                    Sstreak_frequency,
                    longest_run_success,
                    n_runs
                    #                     n_runs_success,
                    #                     phat_after_Fstreak,
                    #                     n_runs_failure,
                    #                     longest_run_failure,
  ))
 }
	Hot Hand Phenomenon Shiny App

	Base R code created by Kevin Ross
	Shiny app files created by Kevin Ross

	Cal Poly Statistics Dept Shiny Series
	http://statistics.calpoly.edu/shiny
	# Shiny app to accompany
	# Ross, K.J. (2016) "Classroom Investigations of Recent Research
	# Concerning the Hot Hand Phenomenon". Working paper, Oct 2016.

	if (!require("shiny")) install.packages("shiny")
	if (!require("shinyBS")) install.packages("shinyBS")
	if (!require("plyr")) install.packages("plyr")
	if (!require("ggplot2")) install.packages("ggplot2")

	library(shiny)
	library(shinyBS)
	library(plyr)
	library(ggplot2)

	# function that returns basic streak statistics given a sequence
	source("streak_stats.r")

	# List of descriptions of streak statistics
	streak_stat_options = as.list(c("Proportion of S after streaks of S" = "phat_after_Sstreak",
	"Difference in proportion of S (after streaks of S - other trials)" = "phat_Sstreak_vs_others",
	"Difference in proportion of S (after streaks of S - after streaks of F)" = "phat_Sstreak_vs_Fstreak",
	"Frequency of S streaks" = "Sstreak_frequency",
	"Longest run of S" = "longest_run_success",
	"Total number of runs" = "n_runs"
	# "Number of runs of S" = "n_runs_success",
	# "Proportion of S after streaks of F" = "phat_after_Fstreak",
	# "Number of runs of F" = "n_runs_failure",
	# "Longest run of F" = "longest_run_failure"
	))

	# global constants
	max_n_repetitions = 20000 # maximum number of repetitions
	max_n_dots = 113 # maximum number of dots to show before histogram
	max_streak_length = 7 # maximum alloable streak length
	observed_color = "#ffa500" # orange
	pvalue_color = "#00bfff" # skyblue
	`%then%` <- shiny:::`%OR%` # for Shiny validate (see validate help)


	ui <- fluidPage(
	theme = "bootswatch-cerulean.css",
	titlePanel("Randomization-based Analysis of the Hot Hand Phenomenon"),
	sidebarLayout(
	sidebarPanel(
	selectInput(inputId = "statistic",
	label = "Choose a streak statistic:",
	choices = streak_stat_options,
	selected = "phat_after_Sstreak"
	),
	numericInput(inputId = "streak_length",
	label = "Define streak length:",
	value = 1,
	min = 1, max = max_streak_length, step = 1),
	radioButtons(inputId = "input_type",
	label = "Select method to input observed data:",
	choices = c("Input results sequence"= "input_data",
	"Input summary statistics"="input_stat"),
	selected = "input_data"),
	conditionalPanel(
	condition = "input.input_type == 'input_stat'",
	textInput(inputId = "n_trials",
	label = "Number of trials:",
	value = ""),
	textInput(inputId = "n_success",
	label = "Total number of successes:", value = ""),
	textInput(inputId = "observed_stat",
	label = "Observed value of streak statistic:",
	value = "")
	),
	conditionalPanel(
	condition = "input.input_type == 'input_data'",
	textInput(inputId = "observed_data",
	label = "Enter observed results sequence",
	value = "")
	),
	actionButton(inputId = "run",
	label="Accept inputs and initialize simulation"),
	tags$hr(style = "border-color: #000000;"),
	numericInput(inputId = "n_repetitions",
	label = "Number of shuffles to simulate:",
	value = 1,
	min = 1, max = max_n_repetitions),
	actionButton(inputId = "more_repetitions",
	label="Shuffle"),
	checkboxInput(inputId = "compute_pvalue",
	label = "Compute p-value",
	value = FALSE),
	checkboxInput(inputId = "recent_shuffle",
	label = "Show most recent shuffle",
	value = FALSE),
	actionButton(inputId = "reset",
	label = "Clear plot"),
	div("Shiny app by",
	a(href="mailto:[email protected]",target="_blank",
	"Kevin Ross"),align="right", style = "font-size: 8pt"),
	div("Base R code by",
	a(href="mailto:[email protected]",target="_blank",
	"Kevin Ross"),align="right", style = "font-size: 8pt"),
	div("Shiny source files:",
	a(href="https://gist.github.com/calpolystat/d40a02fa87508ac5ac4b",
	target="_blank","GitHub Gist"),
	align="right", style = "font-size: 8pt"),
	div(a(href="http://www.statistics.calpoly.edu/shiny",target="_blank",
	"Cal Poly Statistics Dept Shiny Series"),
	align="right", style = "font-size: 8pt")
	),
	mainPanel(
	tabsetPanel(tabPanel(title = "Simulation app",
	tags$b(tags$h4("Observed data",
	style = paste("color: ",
	observed_color, ";"))),
	tableOutput(outputId = "summary_stats"),
	# bsAlert(anchorId = "alert_input_type"),
	# bsAlert(anchorId = "alert_input_data"),
	# bsAlert(anchorId = "alert_input_streak"),
	bsAlert(anchorId = "alert_update_stat"),
	tags$hr(style = "border-color: #000000;"),
	bsAlert(anchorId = "alert_nrep"),
	# bsAlert(anchorId = "alert_click_button"),
	tags$b(tags$h4(textOutput(outputId = "plot_title"),
	style = "text-align: center;")),
	textOutput(outputId = "recent_shuffle"),
	textOutput(outputId = "recent_stat"),
	plotOutput(outputId = "null_plot"),
	tags$b(tags$h4(textOutput(outputId = "observed_stat"),
	style = paste("color: ",
	observed_color, ";"))),
	tags$b(tags$h4(textOutput(outputId = "pvalue"),
	style = paste("color: ",
	pvalue_color, ";")))
	),
	tabPanel(title = "Instructions and notes",
	includeHTML("instructions.html")),
	tabPanel(title = "References",
	includeHTML("references.html")),
	id = "current_tab",
	selected = "Simulation app"
	)
	)
	)
	)

	server <- function(input, output, session){

	##### Validate inputs

	# choose statistic - just read input
	streak_stat_name = reactive({
	return(names(streak_stat_options)[which(streak_stat_options == input$statistic)])
	})

	observeEvent(c(input$statistic, input$streak_length), {
	if (input$input_type == 'input_stat' && input$observed_stat != ""){
	createAlert(session, anchorId = "alert_update_stat",
	content = "Be sure to update the value of the observed statistic.")
	}
	})

	# length of streak - check integer between 1 and 7
	streak_length = reactive({
	validate(
	need(try(input$streak_length%%1 == 0),
	"Streak length must be a whole number")%then%
	need(try(input$streak_length >= 1),
	"Streak length must be at least 1")%then%
	need(try(input$streak_length <= max_streak_length),
	paste("Streak length cannot be more than ",max_streak_length))
	)
	return(input$streak_length)
	})

	# input data method - check that sequence is valid (just 1s and 0s)
	observed_results = reactive({
	if (input$input_type == 'input_data'){
	x = unlist(strsplit(input$observed_data, split=","))
	validate(
	need(setequal(x, c("0","1")),
	"Please enter observed results (1 for success, 0 for failure)
	in sequence separated by commas
	(e.g. 0,1,1,1,0,0,1,0,1,1)")
	)
	return(as.numeric(x))
	}
	})

	# number of trials - check positive integer greater than streak length
	n_trials = reactive({
	if (input$input_type == 'input_stat'){
	validate(
	need(input$n_trials != "",
	"Please input number of trials")%then%
	need(try(as.numeric(input$n_trials) > streak_length()),
	"Number of trials must be greater than streak length")%then%
	need(try(as.numeric(input$n_trials)%%1 == 0),
	"Number of trials must be a whole number")
	)
	return(as.numeric(input$n_trials))
	}else{
	return(length(observed_results()))
	}
	})

	# number of successes - check positive integer less than number of trials
	n_success = reactive({
	if (input$input_type == 'input_stat'){
	validate(
	need(input$n_success != "",
	"Please input observed number of successes")%then%
	need(try(as.numeric(input$n_success) < n_trials()),
	"Number of successes must be less than number of trials")%then%
	need(try(as.numeric(input$n_success) >= streak_length()),
	"Number of successes must be greater than streak length")%then%
	need(try(as.numeric(input$n_success)%%1 == 0),
	"Number of successes must be a whole number")
	)
	return(as.numeric(input$n_success))
	}else{
	return(sum(observed_results()))
	}
	})


	#### Calculations

	# input data method - compute observed stats
	observed_streak_stats <- reactive({
	if (input$input_type == 'input_data'){
	return(streak_stats(observed_results(), streak_length()))
	}
	})

	# compute observed value of selected streak stat
	observed_stat <- reactive({
	if (input$input_type == 'input_data'){
	x = observed_streak_stats()[[input$statistic]]
	}else{
	x = as.numeric(input$observed_stat)
	}
	return(x)
	})

	# invalidate the simulation when any of the inputs change
	is_sim_valid <- reactiveValues(yes = TRUE)

	observeEvent(input$input_type, {
	is_sim_valid$yes = FALSE
	createAlert(session, anchorId = "alert_input_type",
	content = "Enter data and click Accept Inputs.")
	})

	observeEvent(c(input$n_trials, input$n_success, input$observed_data), {
	is_sim_valid$yes = FALSE
	createAlert(session, anchorId = "alert_input_data",
	content = "Observed data has changed. Click Accept Inputs.")
	})

	observeEvent(input$streak_length, {
	is_sim_valid$yes = FALSE
	createAlert(session, anchorId = "alert_input_streak",
	content = "Streak length has changed. Click Accept Inputs.")
	})

	# Simulate random permutations
	# Given # of trials and # of successes
	random_permutations = eventReactive(input$run, {
	is_sim_valid$yes = TRUE
	x = c(rep(1, n_success()), rep(0, n_trials() - n_success()))
	y = matrix(
	replicate(max_n_repetitions,
	sample(x, size = n_trials(), replace = F)),
	byrow=TRUE, nrow=max_n_repetitions)
	return(y)
	})

	# compute streak statistics for the random permutations
	null_dist = eventReactive(input$run, {
	withProgress(message = "Initializing simulation, please wait.",
	detail = "When this message disappears you can use the simulate shuffles button.",
	{
	x = as.data.frame(
	adply(random_permutations(),
	1, streak_stats, streak_length = streak_length()))
	})
	return(x)
	})

	# update counter for producing the plot
	total_nrep <- reactiveValues(current = 0)
	observeEvent(input$more_repetitions, {
	if (is_sim_valid$yes == FALSE){
	createAlert(session, anchorId = "alert_click_button",
	content = "You must click the Accept Inputs
	button before simulating values.")
	total_nrep$current = 0
	}else{
	total_nrep$current = isolate(input$n_repetitions) +
	isolate(total_nrep$current)
	if (total_nrep$current > max_n_repetitions){
	total_nrep$current = max_n_repetitions
	createAlert(session, anchorId = "alert_nrep",
	content = paste("Note: ", max_n_repetitions,
	"is the maximum number of repetitions
	the app will run"))
	}
	}
	})

	# Reset the simulation when inputs change
	observe({
	input$run
	input$reset
	input$streak_length
	input$n_trials
	input$n_success
	input$observed_data
	total_nrep$current = 0
	})

	# all simulated values of the current statistic
	all_values <- reactive({
	y = null_dist()[[input$statistic]]
	return(y)
	})

	# values to plot; plot is updated incrementally
	plot_values <- reactive({
	if (total_nrep$current > 0){
	y = all_values()[1:total_nrep$current]
	return(y[!is.na(y)])
	}
	})

	# compute p-value (and numerator and denominator)
	pvalue <- reactive({
	if (input$statistic == "n_runs"){
	x = sum(plot_values() <= observed_stat())
	}else{
	x = sum(plot_values() >= observed_stat())
	}
	return(c("nrep" = length(plot_values()),
	"count" = x,
	"approx" = round(x / length(plot_values()), 4)))
	})

	#### Outputs

	# Table of summary stats
	output$summary_stats <- renderTable({
	x = data.frame(
	c("Number of trials", "Number of successes", streak_stat_name()),
	c(n_trials(), n_success(), observed_stat())
	)
	colnames(x) = c("Statistic", "Observed value")
	return(x)
	},
	include.rownames = FALSE, display = c("d","s","f") #, digits = 2
	)

	# Title of the plot of the null distribution (and pvalue calc)
	output$plot_title <- renderText({
	if (total_nrep$current>0){
	paste("Null distribution of streak statistic: ",
	streak_stat_name())}
	})

	# Most recent shuffle
	output$recent_shuffle <- renderText({
	if (input$recent_shuffle && total_nrep$current>0){
	paste("Most recent shuffle: ",
	toString(random_permutations()[total_nrep$current, ]))
	}
	})

	# Most recent value of streak statistic
	output$recent_stat <- renderText({
	if (input$recent_shuffle && total_nrep$current>0){
	if (!is.na(all_values()[total_nrep$current])){
	paste("Value of streak statistic for this shuffle: ",
	round(all_values()[total_nrep$current],4))
	}else{
	paste("Value of streak statistic cannot be computed for this shuffle\n
	since there are no streaks of length", streak_length())
	}
	}
	})

	# The plot of the null distribution and p-value
	output$null_plot <- renderPlot({
	x_axis_limits = range(c(all_values(),observed_stat()), na.rm=TRUE)*c(0.95,1.05) ### Need c(.95, 1.05)???
	if (total_nrep$current>0){
	max_y_dots = all_values()[!is.na(all_values())]
	max_y_dots = max_y_dots[1:max_n_dots]
	if (input$statistic == "n_runs"){
	color_value = plot_values() <= observed_stat()
	}else{
	color_value = plot_values() >= observed_stat()
	}
	plot_data = data.frame(plot_value = plot_values(), color_value)
	null_plot = ggplot(data=plot_data, aes(x=plot_value, fill=color_value))
	if (pvalue()[["nrep"]] < max_n_dots){
	null_plot = null_plot +
	geom_dotplot(dotsize = .4,
	na.rm = TRUE,
	binwidth = (x_axis_limits[2]-x_axis_limits[1])/30) +
	scale_y_continuous(expand = c(0.01, 0.01),limits=c(0,max_n_dots/2), name="", breaks = NULL)
	}else{
	null_plot = null_plot +
	geom_histogram(colour="black",
	binwidth = (x_axis_limits[2]-x_axis_limits[1])/50) +
	scale_y_continuous(expand = c(0.01, 0.01))
	}
	if (input$compute_pvalue && !is.na(pvalue()[["approx"]]) && !is.na(observed_stat())){
	if (input$statistic == "n_runs"){
	shade_min = -Inf
	shade_max = observed_stat()
	}else{
	shade_min = observed_stat()
	shade_max = Inf
	}
	pvalue_fill_scale <- scale_fill_manual(
	name = "pvalue_color",
	values = as.character(c("FALSE"="white", "TRUE"=pvalue_color)),
	limits = c("FALSE","TRUE")
	)
	null_plot = null_plot +
	pvalue_fill_scale +
	annotate("rect", xmin = shade_min, xmax = shade_max, ymin = -Inf, ymax = 0, fill=pvalue_color, color=pvalue_color) +
	geom_vline(xintercept = observed_stat(), color = observed_color, linetype="dashed", size = 1.5)
	}else{
	null_plot = null_plot + scale_fill_manual(values=c("white","white"))
	}
	null_plot = null_plot +
	annotate("text", x=Inf, y = Inf,
	label = paste("Median = ", round(median(plot_values()),3),
	"\n Mean = ", round(mean(plot_values()),3),
	"\n SD = ", round(sd(plot_values()),3)),
	vjust=1, hjust=1) +
	coord_cartesian(xlim=c(x_axis_limits)) +
	ggtitle(paste("Based on ",length(plot_values())," simulated values
	\n (resulting from ", total_nrep$current," repetitions of the simulation)")) +
	theme(plot.title = element_text(hjust = 0.5)) +
	labs(x="Streak statistic",y="Frequency") +
	theme(legend.position = "none",
	panel.grid.major = element_blank(),
	panel.grid.minor = element_blank(),
	panel.background = element_blank(),
	axis.line.x = element_line(color="black", size = 0.5),
	axis.line.y = element_line(color="black", size = 0.5),
	plot.title = element_text(face="bold", size=14),
	axis.title = element_text(face="bold", size=14))
	print(null_plot)
	}
	})

	# Output value of observed statistic for plot
	output$observed_stat <- renderText({
	if (input$compute_pvalue){
	if (is.na(observed_stat()) & input$input_type == 'input_data'){
	paste("Warning: observed value of the streak statistic cannot be computed
	because there are no trials following a streak of length ",
	streak_length(), " in the observed data")
	}else if (is.na(observed_stat()) & input$input_type == 'input_stat'){
	paste("Please enter the observed value of the streak statistic")
	}else{ ####
	paste("Observed value of streak statistic = ", round(observed_stat(),4))
	}
	}
	})

	# Output the p-value calculation
	output$pvalue <- renderText({
	if (input$compute_pvalue){
	if (is.na(observed_stat())){
	paste("The p-value cannot be computed because the observed statistic cannot be computed")
	}else if (pvalue()[["nrep"]] == 0){
	paste("There must be at least one simulated value of the streak statistic in order to compute an approximate p-value")
	}else{
	paste("Simulated p-value = ", pvalue()[["count"]], "/",
	pvalue()[["nrep"]], " = ",
	pvalue()[["approx"]])
	}
	}
	})
	}

	shinyApp(ui = ui, server = server)
	Title: Hot Hand Phenomenon
	Author: Kevin Ross
	AuthorUrl: https://statistics.calpoly.edu
	License: MIT
	DisplayMode: Normal
	Tags: Simulation-based inference, Hot hand in basketball, Hot hand fallacy
	Type: Shiny
	The MIT License (MIT)

	Copyright (c) 2017 Kevin Ross

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in
	all copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	THE SOFTWARE.
	# function that returns basic streak statistics given a sequence
	streak_stats = function(results_sequence, streak_length){
	n_trials = length(results_sequence)
	n_success = sum(results_sequence)

	run_lengths = rle(results_sequence)$lengths
	if_run_success = rle(results_sequence)$values
	n_runs = length(run_lengths)

	# runs of successes
	longest_run_success = max(run_lengths*if_run_success)
	n_runs_success = sum(if_run_success)

	# successes after streaks of Successes
	run_lengths_adj = run_lengths - streak_length + 1
	run_values_adj_S =
	if_run_success *
	(!((if_run_success == 1) &
	(run_lengths < streak_length)))
	n_after_Sstreak = sum(run_lengths_adj * run_values_adj_S) -
	run_values_adj_S[length(run_values_adj_S)]
	n_success_after_Sstreak = sum((run_lengths - streak_length) * run_values_adj_S)

	# runs of failures
	longest_run_failure = max(run_lengths*(1-if_run_success))
	n_runs_failure = sum(1-if_run_success)

	# successes after streaks of Failures
	run_values_adj_F =
	(1-if_run_success) *
	(!((if_run_success == 0) &
	(run_lengths < streak_length)))
	n_after_Fstreak = sum(run_lengths_adj * run_values_adj_F) -
	run_values_adj_F[length(run_values_adj_F)]
	n_success_after_Fstreak = n_after_Fstreak -
	sum((run_lengths - streak_length) * run_values_adj_F)

	# Proportion of hits on trials after streaks
	phat_after_Sstreak = n_success_after_Sstreak / n_after_Sstreak

	# Difference in proportion of hits (trials after streaks - all other trials)
	phat_after_no_Sstreak = (n_success - n_success_after_Sstreak)/
	(n_trials - n_after_Sstreak)
	phat_Sstreak_vs_others = phat_after_Sstreak - phat_after_no_Sstreak

	# Difference in proportion of hits (trials after hit streaks - trials after miss streaks)
	phat_after_Fstreak = n_success_after_Fstreak / n_after_Fstreak
	phat_Sstreak_vs_Fstreak = phat_after_Sstreak - phat_after_Fstreak

	# Hit streak frequency - proportion of trials that occur after hit streaks
	Sstreak_frequency = n_after_Sstreak / (n_trials - streak_length)

	# collect the streak stats for the results sequence
	return(data.frame(phat_after_Sstreak,
	phat_Sstreak_vs_others,
	phat_Sstreak_vs_Fstreak,
	Sstreak_frequency,
	longest_run_success,
	n_runs
	# n_runs_success,
	# phat_after_Fstreak,
	# n_runs_failure,
	# longest_run_failure,
	))
	}