# Explore topic 13d
# First load all of the functions we will use
source( "../gnrnd5.R")
source( "../gnrnd4.R")
source( "../ci_prop.R")
# Topic 13d looks at generating a confidence
# interval for a population proportion
#
# Let us generate a population
gnrnd5(33658499907, 458465)
# put the population into big_pop
big_pop <- L1
# We can look at some of the population
head( big_pop, 100)
# Then, our interest is to get a 92% confidence interval
# for the proportion of 3's in the population.
# To do this we will take a sample
# of a certain size
samp_size <- 93
# then, just so that we can all get the same
# sample, generate the index values for
# a sample of that size
key1 <- 702370001+ (samp_size-1)*100
gnrnd4(key1, 500000001)
L1
this_sample <- big_pop[ L1 ]
# look at our sample
this_sample
# we can find the sample proportion of 3's
table( this_sample )
samp_proport_3 <- 30/length( this_sample )
samp_proport_3
# Now, because we will use the sample proportion,
# if certain conditions are met, then we can
# use the normal approximation to the distribution
# of the proportions.
# the conditions are that n*p>=10 and n*(1-p)>=10
93*samp_proport_3
93* (1 - samp_proport_3)
# those both pass, so we can use the value
# sqrt( p*(1-p)/n ) for the standard deviation of
# our sample statistic, the standard error, in a
# a normal population.
st_error <- sqrt( samp_proport_3*(1-samp_proport_3)/samp_size)
st_error
# The sample proportion is our point estimate
# For a 92% confidence interval we need 4% on the
# outside of each side of the interval
z_low <- qnorm( 0.04 )
z_low
z_high <- qnorm( 0.96 )
z_high
# we can find
# samp_proport_3 +/- z(alpha_over2)*st_error
# CI low value
samp_proport_3 + z_low*st_error
# CI high value
samp_proport_3 + z_high*st_error
##### or we could have found the margin of error
MOE <- z_high*st_error
MOE
# and then found the limits for the
# confidence interval
samp_proport_3 - MOE # the low end
samp_proport_3 + MOE # the high end
### of course all of this could be done via
# our ci_prop function
ci_prop( 93, 30, 0.92)
##################################
# we could try this at a different confidence
# level. Just alter the confidence level and
# then run the subsequent lines, or just skip
# down to line 83 and get the new values
####################################
# If we express the confidence level as a
# percent then we say that that percent of the
# confidence intervals that we generate
# using this methodology will contain the
# true proportion. That means, that at this point
# in running the script, I do not know if the
# 92% confidence interval that we generated,
# namely (0.2377, 0.4074 ) does or does not
# contain the true population proportion of 3's..
#
# Let us find the true proportion of 3's and see if it is
# in the interval.
item_count <- table( big_pop )
item_count
true_proportion <- item_count[3]/length( big_pop )
true_proportion
# yes it is!
# This has been an illustration, but let us
# go through the process 10000 times and
# see how many intervals that we generate this
# way contain the true mean
# first reset the confidence level and
# sample size just in case we want to change
# them later
conf_level <- 0.92
samp_size <- 93
L3 <- 1:10000
for( i in 1:10000 ) {
this_sample <- sample( big_pop, samp_size )
this_count <- table( this_sample )
this_ci <- ci_prop( samp_size,
this_count[3],
conf_level)
if( this_ci[1] <= true_proportion &
true_proportion <= this_ci[2] ) {
L3[i] = "hit"}
else {
L3[i] = "missed"}
}
# see how we did
table( L3 )
#########
# if we want we can do this again and we
# can even change the values in lines 124
# and/or 125 if we want.