Created
November 6, 2017 20:47
-
-
Save dggoldst/134d34bef365c393f7ebbc70e0b0fe39 to your computer and use it in GitHub Desktop.
flips_streaks.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(stringr) | |
library(ggplot2) | |
library(dplyr) | |
library(scales) | |
library(markovchain) | |
MAXSTREAKLEN=16 | |
#H/T https://math.stackexchange.com/questions/383704/probability-of-streaks for this soln | |
get_prob = function(streaklen,sequencelen) | |
{ | |
trans=matrix(0,nrow=streaklen,ncol=streaklen) | |
for(i in 1:streaklen) | |
{ | |
if(1==i) {trans[i,1:2]=.5} | |
else if(i<=(streaklen-1)) {trans[i,c(1,i+1)]=.5} | |
else if(i==streaklen) {trans[i,streaklen]=1} | |
else {stop("badness")} | |
} | |
mcFlips <- new("markovchain", | |
states = as.character(1:streaklen), | |
transitionMatrix = trans) | |
initialState <- c(1, rep(0,streaklen-1)) | |
(initialState * (mcFlips ^ (sequencelen-1)))[streaklen] | |
} | |
#MORE LIKELY THAN NOT (>50% likely) | |
answers50 = rep(0,MAXSTREAKLEN) | |
currseqlen = 2 | |
system.time({ | |
for(streaklen in 2:MAXSTREAKLEN) | |
{ | |
while(get_prob(streaklen,currseqlen)<=.5) { currseqlen = currseqlen+1 } | |
answers50[streaklen]=currseqlen | |
} | |
}) | |
#HIGHLY LIKELY (>99% likely) | |
answers99 = rep(0,MAXSTREAKLEN) | |
currseqlen = 2 | |
for(streaklen in 2:MAXSTREAKLEN) | |
{ | |
while(get_prob(streaklen,currseqlen)<=.99) { currseqlen = currseqlen+1 } | |
answers99[streaklen]=currseqlen | |
} | |
plot_data = data.frame( | |
streak_length = 1:16, | |
sequence_length_50 = answers50, | |
sequence_length_99 = answers99 | |
)[2:10,] | |
p=ggplot(plot_data,aes(streak_length,sequence_length_50,label=sequence_length_50)) | |
p=p+theme_bw() + | |
geom_bar(stat="identity",fill="lightblue") + | |
geom_text(aes(y=sequence_length_50+20)) + | |
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) + | |
scale_y_continuous(breaks=seq(0,800,by=100)) + | |
labs( | |
x="Streak Length", | |
y="Number of Flips", | |
title="Minimum Number of Flips Needed for a Streak\nTo be More Likely Than Not", | |
subtitle="That is, streak probability greater than 50%" | |
) | |
p | |
ggsave(plot=p,file="flips_50.png",height=4,width=5,dpi=600) | |
### | |
plot_data = data.frame( | |
streak_length = 1:16, | |
sequence_length_50 = answers50, | |
sequence_length_99 = answers99 | |
)[2:10,] | |
p=ggplot(plot_data,aes(streak_length,sequence_length_99,label=comma(sequence_length_99))) | |
p=p+theme_bw() + | |
geom_line(alpha=.5,color="blue",aes(y=sequence_length_50)) + | |
geom_line(alpha=.5) + | |
geom_text(aes(y=sequence_length_99+400)) + | |
geom_text(aes(y=sequence_length_50-200,label=comma(sequence_length_50))) + | |
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) + | |
labs( | |
x="Streak Length", | |
y="Number of Flips", | |
title="Minimum Number of Flips Needed for Streaks\nTo be Likely or Highly Likely", | |
subtitle="That is, streak probabilities greater than 50% and 99%" | |
) | |
p | |
ggsave(plot=p,file="flips_50_99.png",height=4,width=5,dpi=600) | |
#Log version | |
plot_data = data.frame( | |
streak_length = 1:16, | |
sequence_length_50 = answers50, | |
sequence_length_99 = answers99 | |
)[2:16,] | |
p=ggplot(plot_data,aes(streak_length,sequence_length_99,label=comma(sequence_length_99))) | |
p=p+theme_bw() + | |
geom_line(alpha=.5,color="blue",aes(y=sequence_length_50)) + | |
geom_line(alpha=.5) + | |
geom_text(aes(y=sequence_length_99*1.5),size=2.5) + | |
geom_text(aes(y=sequence_length_50*.5,label=comma(sequence_length_50)),size=2.5) + | |
scale_x_continuous(breaks=1:(nrow(plot_data)+1)) + | |
scale_y_log10(breaks=c(3,10,30,100,300,1000,3000,10000,30000,100000,300000),label=comma) + | |
#scale_y_continuous(breaks=seq(0,800,by=100)) + | |
labs( | |
x="Streak Length", | |
y="Number of Flips", | |
title="Minimum Number of Flips Needed for Streaks\nTo be Likely or Highly Likely", | |
subtitle="That is, streak probabilities greater than 50% and 99%" | |
) | |
p | |
ggsave(plot=p,file="flips_log.png",height=4,width=6,dpi=600) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment