# Explore topic 13e
# First load all of the functions we will use
source( "../gnrnd5.R")
source( "../gnrnd4.R")
source( "../pop_sd.R")
source( "../ci_stddev.R")
# Topic 13e looks at generating a confidence
# interval for a population standard deviation
#
# Let us generate a population
gnrnd5(145898499904, 432003526)
# put the population into big_pop
big_pop <- L1
# We can look at some of the population
head( big_pop, 20)
# Then, our interest is to get a 97% confidence interval
# for the standard deviation of the population.
# To do this we will take a sample
# of a certain size
samp_size <- 43
# then, just so that we can all get the same
# sample, generate the index values for
# a sample of that size
key1 <- 550910001+ (samp_size-1)*100
gnrnd4(key1, 500000001)
L1
this_sample <- big_pop[ L1 ]
# look at our sample
this_sample
# we can find the sample standard deviation
samp_sd <- sd( this_sample )
samp_sd
# Now, we will use the sample standard deviation as
# our point estimate. We know that the distribution
# of the ratio of (samp_size-1)*samp_sd^2 to
# sigma^2, where sigma is the standard deviation of
# the population will be a chi-squared
# distribution with samp_size-1 degrees of freedom.
# Therefore, for a 97% confidence interval we want to find
# the x_left value that has (1-0.97)/2 = 0.015 as the
# area to the left of that value, for samp_size-1
# degrees of freedom and the value x_right that has
# 0.015 as the area to the right of that value.
x_left <- qchisq( 0.015, samp_size-1 )
x_left
x_right <- qchisq( 0.015, samp_size-1, lower.tail=FALSE )
x_right
# then the left side of the confidence interval
# is
left_side <- sqrt((samp_size-1)*samp_sd^2/x_right)
# and the right side of the confidence interval
# is
right_side <- sqrt((samp_size-1)*samp_sd^2/x_left)
#
left_side
right_side
#
### of course all of this could be done via
# our ci_known function
ci_stddev(samp_size, samp_sd, 0.97)
##################################
# we could try this at a different confidence
# level. Just alter the confidence level and
# then run the subsequent lines, or just skip
# down to line 72 and get the new values
####################################
# If we express the confidence level as a
# percent then we say that that percent of the
# confidence intervals that we generate
# using this methodology will contain the
# true standard deviation. That means, that at this point
# in running the script, I do not know if the
# 97% confidence interval that we generated,
# namely (35.657, 57.607 ) does or does not
# contain the true population standard deviation.
#
# Let us find the true standard deviation and see if it is
# in the interval.
sigma <- pop_sd( big_pop )
sigma
# yes it is!
# This has been an illustration, but let us
# go through the process 10000 times and
# see how many intervals that we generate this
# way contain the true mean
# first reset the confidence level and
# sample size just in case we want to change
# them later
conf_level <- 0.97
samp_size <- 43
L3 <- 1:10000
for( i in 1:10000 ) {
this_sample <- sample( big_pop, samp_size )
samp_sd <- sd( this_sample )
this_ci <- ci_stddev( samp_size,
samp_sd,
conf_level)
if( this_ci[1] <= sigma &
sigma <= this_ci[2] ) {
L3[i] = "hit"}
else {
L3[i] = "missed"}
}
# see how we did
table( L3 )
#########
# if we want we can do this again and we
# can even change the values in lines 107
# and/or 108 if we want.