# line 1 # Look at finding a confidence interval for # the difference of two proportions in two # populations. # # For this example get a 96% confidence interval for # the difference p_1 - p_2 where p_1 is the proportion # of 4's in population one and p_2 is the proportion # of 4's in population two. # # We will start by generating two populations source("../gnrnd5.R") gnrnd5( 29472799907, 54363637) L2 <- L1 head(L2,10) tail(L2,10) length(L2) # gnrnd5(40561699907, 35476337) head(L1,14) tail(L1,14) length(L1) # # Now that we have our two populations # we need to take random samples of the # two populations # # ############################################ # ## Each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get different confidence ## # ## intervals. ## # ############################################ n_1 <- 93 # get a sample of size 93 from L1 index_1 <- as.integer( runif( n_1, 1, 7001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 104 # get a sample of size 104 from L2 index_2 <- as.integer( runif( n_2, 1, 8001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the frequency of the value # 4 in each of the two samples. table_one <- table( samp_1 ) table_one x_1 <- table_one[4] x_1 table_two <- table( samp_2 ) table_two x_2 <- table_two[4] x_2 # and we need to get the proportion of 4's in each sample prop_1 <- x_1/n_1 prop_1 prop_2 <- x_2/n_2 prop_2 # # Then our best point estimate for p_1 - p_2 is # prop_1 - prop_2 pe <- prop_1 - prop_2 # # the standard deviation of the test statistic # is given by # sqrt( prop_1(1-prop_1)/n_1 + prop_2(1-prop_2)/n_2) stdev <- sqrt( prop_1*(1-prop_1)/n_1 + prop_2*(1-prop_2)/n_2 ) stdev # # since we want a 96% confidence interval we want to # find the z value tht has 2% above it z <- qnorm(0.02, lower.tail=FALSE) z # then the low end of the confidence interval is pe - z*stdev # and the high end of the confidence interval is pe + z*stdev # # # We could have taken the shortcut and used the # function that is provided. # source("../ci_2popproport.R") ci_2popproportion( n_1, x_1, n_2, x_2, 0.96 ) # # ################################################ # ## Now, highlight and rerun, over and over, ## # ## lines 34-89, to get repeated samples and ## # ## thus, repeated 96% confidence intervals. ## # ## While you do this be aware that the true ## # ## difference between the population ## # ## proportions is 0.02448214. How often do ## # ## your intervals contain the true ## # ## ## # ################################################