# Look at the binomial probability distribution
################################################
## Important Note: Students are not expected ##
## to be able to come up with the ideas and ##
## programming examples demonstrated on this ##
## script. ##
################################################
#
# In the binomial situation we have:
# a) a fixed number of trials
# b) each trial has exactly two possible outcomes,
# a success or a failure
# c) the probability of success is the same for
# each trial
# d) the trials are independent
# e) we are looking at a random variable that
# is the number of successes in those
# trials
# Look at a case where we have 5 trials
# How many ways could we get 2 successes in those
# five trials (positions noted above items)
# 12345 12345 12345 12345
# SFFFS FSFFS FFSFS FFFSS
# SFFSF FSFSF FFSSF
# SFSFF FSSFF
# SSFFF
# so there are 10 ways.
# In the list of possible ways of getting
# 2 successes in 5 trials note the position
# of the successes. Translating those positions
# into a new list gives us
# 1 2 2 3 3 4 4 5
# 1 3 2 4 3 5
# 1 4 2 5
# 1 5
# We could see these same values by using
# The combinations function, but we need to
# install that.
install.packages("gtools")
library(gtools)
combinations(5,2,c(1,2,3,4,5))
#
# Again, we note that there are 10 different
# items that represent the ways we could get
# 2 successes in 5 trials. That number of
# different ways is the number of combinations
# of 5 things take 2 at a time. the function
# combinations(5,2,c(1,2,3,4,5)) lists all of
# them, but we just need to know the number
# of them. For that we could use nCr(5,2)
# or num_comb(5,2).
source("../combinations.R")
nCr(5,2) # is one alternative
num_comb(5,2) # is a different alternative
# Now, if the P(success) = p then
# P(failure) = (1-p)
# Next note that the probability of getting any
# one of the items with 2 successes and 3
# failures is p^2 * (1-p)^3
# Therefore, the probability of getting any of
# the 10 items is 10*p^2*(1-p)^3
#
# Or, more generally, the probability of getting
# exactly r successes in n trials when the
# probability of success on one trial is p is
# given by nCr(n,r)*p^r * (1-p)^(n-r)
# So, the long way to find the probability of
# getting exactly 4 successes in 10 trials when
# the probability of getting a success in one
# trial is 0.45 is given by
nCr(10,4)*0.45^4 * (1-0.45)^(10-4)
#
# We have a function that will compute this
source("../pbinomeq.R")
pbinomeq(4,10, 0.45 )
# Here are a few more examples
pbinomeq( 0, 10, 0.45) # P( 0 successes )
pbinomeq( 1, 10, 0.45) # P( 1 success )
pbinomeq( 2, 10, 0.45) # P( 2 successes )
pbinomeq( 3, 10, 0.45) # P( 3 successes )
###############################################
## Stop here and compare these results to ##
## the values shown in the binomial ##
## distribution table for 10 trials that is ##
## on the web page. ##
###############################################
# This is all well and good, but the usual question
# in this area is to find the probability of
# getting 3 or fewer successes in 10 trials where
# the probability of a success on one trial is 0.45.
#
# We could just add up the last four results as in
# P(X=0) + P(X=1) + P(X=2) + P(X=3) = P(X <= 3)
#
pbinomeq( 0, 10, 0.45) +
pbinomeq( 1, 10, 0.45) +
pbinomeq( 2, 10, 0.45) +
pbinomeq( 3, 10, 0.45)
#
# In the old days we would look this up in a table
# for the cumulative probabilities in binomial
# settings. (see the web page)
###############################################
## Stop here and compare these results to ##
## the values shown in the table on the ##
## web page. ##
###############################################
# R has a built-in function, pbinom(), that computes
# the cumulative probability for a binomial
# situation. So, P(X<=3) for 10 trials when the
# probability of success on a single trial is 0.45
# is
pbinom( 3, 10, 0.45)
# Of course, now that we have pbinom() we really
# do not need pbinomeq because for 13 trials with
# the probability of a single success being 0.423
# we can find P(X=6) by
pbinom(6,13,0.423) - pbinom(5,13,0.423)
# Why did that work?????????????????
# let us compare that to
pbinomeq( 6, 13, 0.423 )
# could we look this up on the table?
# In fact, look it up on both tables
##### pause to do the look ups
########################################
## Look at a whole range of problems that we
## can now do using pbinom()
## For all of these we will look at 13 trials
## and a probability of success on a single
## trial will be 0.393
# Find the probability of getting 7 or fewer successes
pbinom( 7, 13, 0.393)
# Find the probability of getting fewer than 4 successes
pbinom( 3,13, 0.393)
# Find the probability of getting more than 8 successes
1 - pbinom( 8, 13, 0.393 )
# Find the probability of getting between 5 and 8,
# inclusive, successes
pbinom( 8, 13, 0.393 ) # is P(X<=8)
pbinom( 4, 13, 0.393 ) # is P(X<=4)
# so the difference is P( 5 <= X <= 8)
pbinom( 8, 13, 0.393 ) - pbinom( 4, 13, 0.393 )
# Find the probability of getting less than 4 or
# more than 10 successes
pbinom( 3, 13, 0.393 ) + (1 - pbinom(10,13,0.393))
############ here is one new feature
# pbinom( 10,13,0.393) is P( X <= 10 )
#but pbinom( 10,13,0.393, lower.tail=FALSE) is
# P( X > 10 )
# Therefore the previous problem could have been
# answered with
pbinom( 3, 13, 0.393 ) +
pbinom( 10, 13, 0.393, lower.tail=FALSE )
############################################
## Look at the measures of the binomial ##
## distribution ##
############################################
# find the mean of a binomial distribution
# where we have 17 trials and the probability
# of success on a single trial is 0.2941
#
# The mean turns out to be n*p
17*0.2941 # is the mean
#
# We can verify this by applying the law of
# large numbers.
# Create a huge listing of outcomes from
# 0 to 17 where each value happens at the
# probability of getting that many successes out
# of 17 trials.
# We will get about 10000 outcomes
outcomes <- NULL
for (i in 0:17 ) {
expected_num <- pbinomeq(i,17,0.2941)*10000
expected_num <- round(expected_num,0)
outcomes <- c(outcomes, rep(i,expected_num))
}
# look at how many outcomes of each possible
# number of successes we have
table( outcomes )
# now find the mean of those outcomes
mean( outcomes )
# Now find the variance and the standard deviation
# of the same binomial distribution.
# The variance is n*p*(1-p)
17*0.2941*(1-0.2941)
# The standard deviation is sqrt(n*p*(1-p))
sqrt(17*0.2941*(1-0.2941))
#
# Compare those to our huge list of outcomes
source("../pop_sd.R")
pop_sd( outcomes ) # is the standard deviation
pop_sd( outcomes )^2 # is the variance