# line 1 # Look at the null hypothesis that the # difference of two proportions in two # populations is zero against the alternative # hypothesis that the _1 - p_2 > 0. # # For this example we will use a 3% level of # significance and we will look at the proportion # of 3's in the two populations where p_1 is the proportion # of 3's in population one and p_2 is the proportion # of 3's in population two. # # We will start by generating two populations source("../gnrnd5.R") gnrnd5( 94472799907, 54367637) L2 <- L1 head(L2,10) tail(L2,10) length(L2) # gnrnd5(45861699907, 25457427) head(L1,14) tail(L1,14) length(L1) # # Now that we have our two populations # we need to take random samples of the # two populations # # ############################################ # ## Each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get different confidence ## # ## intervals. ## # ############################################ n_1 <- 95 # get a sample of size 95 from L1 index_1 <- as.integer( runif( n_1, 1, 7001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 89 # get a sample of size 89 from L2 index_2 <- as.integer( runif( n_2, 1, 8001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the frequency of the value # 3 in each of the two samples. table_one <- table( samp_1 ) table_one x_1 <- table_one[3] x_1 table_two <- table( samp_2 ) table_two x_2 <- table_two[3] x_2 # and we need to get the proportion of 3's in each sample prop_1 <- x_1/n_1 prop_1 prop_2 <- x_2/n_2 prop_2 # # Then our test statistic is p_1 - p_2 is # test_stat <- prop_1 - prop_2 test_stat # # the standard deviation of the test statistic under # the null hypothesis is given by first finding phat # as the pooled proportion phat <- (x_1+x_2) /(n_1 + n_2) # then the standard deviation is # sqrt( prop_1(1-prop_1)/n_1 + prop_2(1-prop_2)/n_2) stdev <- sqrt( phat*(1-phat)/n_1 + phat*(1-phat)/n_2 ) stdev # # To find the critical value for this one-sided test # we need the z value that has 3% of the are to its right. z <- qnorm(0.03, lower.tail=FALSE) z # Then our critical high value will be z*stdev z*stdev # reject H0 if test statistic is greater than the # critical value # # For the attained significance approach we will # need to find out how strange it is to get our # test statistic. # pnorm( test_stat, mean=0, sd=stdev, lower.tail=FALSE ) # # Reject H0 if the attained significance is less than # our alpha level. # We could have taken the shortcut and used the # function that is provided. # source("../hypo_2popproport.R") hypoth_2test_prop( x_1, n_1, x_2, n_2, 33, 0.03 ) # # ################################################ # ## Now, highlight and rerun, over and over, ## # ## lines 36-103, to get repeated samples and## # ## thus, repeated hypothesis test. ## # ## While you do this be aware that the true ## # ## difference between the population ## # ## proportions is 0.03667857. ## # ################################################