# Explore topic 13c
# First load all of the functions we will use
source( "../gnrnd5.R")
source( "../gnrnd4.R")
source( "../pop_sd.R")
source("../assess_normality.R")
source( "../ci_unknown.R")
# Topic 13c looks at generating a confidence
# interval for a population mean when we do not
# know the population standard deviation.
#
# Let us generate a population
gnrnd5(145209499902, 3306001054)
# put the population into big_pop
big_pop <- L1
# let us say that we are taking a sample
# of a certain size
samp_size <- 38
# then, just so that we can all get the same
# sample, generate the index values for
# a sample of that size
key1 <- 702370001+ (samp_size-1)*100
gnrnd4(key1, 500000001)
L1
this_sample <- big_pop[ L1 ]
# look at our sample
this_sample
# we can find the sample mean and the sample standard
# deviation
samp_mean <- mean( this_sample )
samp_sd <- sd( this_sample )
samp_mean
samp_sd
# Now, because we will use the sample standard deviation
# to estimate the population standard deviation, the
# distribution of the sample means will be a
# Student's-t with (samp_size-1) degrees of freedom.
# then we make our confidence interval for
# some specified confidence level
conf_level <- 0.95
# that means that we are missing 1-conf_level
# which we will split in half, one half in
# each tail
t_over_2 <- (1-conf_level)/2
t_over_2
# we can find
# samp_mean +/- t(alpha_over2)*samp_sd/sqrt(samp_size)
# and that will be our confidence interval
t_score_low <- qt( t_over_2, samp_size - 1 )
t_score_low
t_score_high <- qt( t_over_2, samp_size - 1,
lower.tail=FALSE) # opposite of low val
t_score_high
st_error <- samp_sd / sqrt( samp_size )
st_error
# CI low value
samp_mean + t_score_low*st_error
# CI high value
samp_mean + t_score_high*st_error
##### or we could have found the margin of error
MOE <- t_score_high*st_error
MOE
# and then found the limits for the
# confidence interval
samp_mean - MOE # the low end
samp_mean + MOE # the high end
### of course all of this could be done via
# our ci_known function
ci_unknown( samp_sd, samp_size, samp_mean, conf_level)
##################################
# we could try this at a different confidence
# level. Just alter the value in line 49 and
# then run the subsequent lines, or just skip
# down to line 83 and get the new values
####################################
# If we express the confidence level as a
# percent then we say that that percent of the
# confidence intervals that we generate
# using this methodology will contain the
# true mean. That means, that at this point
# in running the script, I do not know if the
# 95% confidence interval that we generated,
# namely (173.464, 213.299 ) does or does not
# contain the true mean.
#
# Let us find the true mean and see if it is
# in the interval.
true_mean <- mean( big_pop )
true_mean
# yes it is!
# This has been an illustration, but let us
# go through the process 10000 times and
# see how many intervals that we generate this
# way contain the true mean
# first reset the confidence level and
# sample size just in case we want to change
# them later
conf_level <- 0.95
samp_size <- 38
L3 <- 1:10000
for( i in 1:10000 ) {
this_sample <- sample( big_pop, samp_size )
this_ci <- ci_unknown( sd(this_sample), samp_size,
mean( this_sample),
conf_level)
if( this_ci[1] <= true_mean &
true_mean <= this_ci[2] ) {
L3[i] = "hit"}
else {
L3[i] = "missed"}
}
# see how we did
table( L3 )
#########
# if we want we can do this again and we
# can even change the values in lines 118
# and/or 119 if we want.