# Line 1: a small demonstration of getting a # confidence interval for the mean # of a population with unknown standard deviation. # # First, we will get a population # In this case we will get a large population source("../gnrnd5.R") gnrnd5(182651734104,285002867) #let us look at the head and tail values head(L1) tail(L1) min(L1) max(L1) # # now, we could find the standard deviation of the # population, but this is supposed to be an example # of finding confidence intervals for the mean when # we do not know the population standard deviation. # # just a quick look at L1 hist(L1) boxplot(L1, horizontal=TRUE) source("../assess_normality.R") assess_normality( L1 ) # # L1 sure looks like a Normal distribution. # # ########################## # ## Problem: find the 95% confidence interval # ## for the mean of the population when we # ## do not know the population standard deviation. # ########################## # take a simple random sample of size 23 # # Be careful: Every time we do this we get # a different random sample # L2 <- as.integer( runif(23, 1, 7343) ) # L2 holds the index values of our simple random sample L2 L3 <- L1[ L2 ] # L3 holds the simple random sample L3 # we will get the mean of L3 xbar <- mean(L3) xbar # # and we will get the standard deviation of the sample sx <- sd( L3 ) sx # # Remember that the distribution of sample means # will be the Student's-t distribution with n-1 # degrees of freedom, in this case 22 degrees of # freedom. And the distribution of the sample means # will have the same mean as the population # and standard deviation equal to the population # standard deviation divided by the square root of # the sample size. However, for the Student's-t we # the standard deviation of the sample divided by # the square root of the size of the sample. # # The long way to generate the confidence interval # is to find the t-value in a Student's-t # distribution with 22 degrees of freedom # such that there is 95% of the # area between -t and t. That means that 2.5% is # less than -t and 2.5% is greater that t. # We can find that z value via qt. # t <- qt(0.025, 22, lower.tail=FALSE) t # # Then our margin of error is z*sigma/sqrt(23) # moe <- t*sx/sqrt(23) moe # # and our confidence interval is between # xbar-moe and xbar+moe # xbar - moe xbar + moe # # Of course, we could use the function # ci_unknown() to do this in # one easy step. # source("../ci_unknown.R") ci_unknown( sx, 23, xbar, 0.95 ) # # ################################# # go back and execute lines 34-90 many more times. # Each time you get a different random sample. # Therefore, each time you get a different # confidence interval. Note that the MOE changes # each time because the standard deviation of the # sample changes for each sample. By the way, the # true mean of the population is about 286.62002. #################################