# line 1 # Look at finding a confidence interval for # the difference of two means when the population # standard deviations are known. # # For this example get a 93% confidence interval for # the difference mu_1 - mu_2. # # We will start by generating two populations source("../gnrnd5.R") gnrnd5( 276932499901, 10025010176) L2 <- L1 head(L2,10) tail(L2,10) source("../pop_sd.R") sigma_2 <- pop_sd( L2 ) sigma_2 length(L2) # gnrnd5(245961599901, 10413010732) head(L1,14) tail(L1,14) sigma_1 <- pop_sd(L1) sigma_1 length(L1) # # Now that we have our two populations # and now that we know the standard deviation of each # population, we need to take random samples of the # two populations # # ############################################ # ## each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get different confidence ## # ## intervals. ## # ############################################ n_1 <- 43 # get a sample of size 43 from L1 index_1 <- as.integer( runif( n_1, 1, 6001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 56 # get a sample of size 56 from L2 index_2 <- as.integer( runif( n_2, 1, 5001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the mean of each sample xbar_1 = mean( samp_1 ) xbar_1 xbar_2 = mean( samp_2 ) xbar_2 # # So our best point estimate is pnt_est <- xbar_1 - xbar_2 pnt_est # # The distribution of the difference of the means will # be normal with standard deviation equal to # sqrt( sigma_1^2/n_1 + sigma_2^2/n_2 ) sd_difference <- sqrt( sigma_1^2/n_1 + sigma_2^2/n_2) sd_difference # # The, for a 93% confidence interval we need to # have the 7% we are missing split evenly on both # tails. But, since this is a normal distribution # the two z-values will just be opposites. We will # just find the upper value. # z <- qnorm( 0.07/2, lower.tail=FALSE ) z # then our confidence interval has a low end of pnt_est - z*sd_difference # and a high end of pnt_est + z*sd_difference # # We could have taken the shorcut and used the # function that is provided. # source("../ci_2known.R") ci_2known( sigma_1, n_1, xbar_1, sigma_2, n_2, xbar_2, 0.93 ) # # ################################################ # ## Now, highlight and rerun, over and over, ## # ## lines 38-84, to get repeated samples and ## # ## thus, repeated 93% confidence intervals. ## # ## While you do this be aware that the true ## # ## difference between the population means ## # ## is 7.811552. How often do your ## # ## intervals contain the true mean? ## # ################################################