# line 1 # Look at testing the hypothesis that there is no # difference between two means when the population # standard deviations are known. # # For this example we will run the test at the 0.02 # level of significance and the alternative hypotheis # is that the difference mu_1 - mu_2 != 0. # # We will start by generating two populations source("../gnrnd5.R") gnrnd5( 256472499901, 11225010176) L2 <- L1 head(L2,10) tail(L2,10) source("../pop_sd.R") sigma_2 <- pop_sd( L2 ) sigma_2 length(L2) # gnrnd5(245402599901, 10413010732) head(L1,14) tail(L1,14) sigma_1 <- pop_sd(L1) sigma_1 length(L1) # # Now that we have our two populations # and now that we know the standard deviation of each # population, we need to take random samples of the # two populations # # ############################################ # ## each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get a different ## # ## test of the null hypothesis. ## # ############################################ n_1 <- 37 # get a sample of size 37 from L1 index_1 <- as.integer( runif( n_1, 1, 6001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 49 # get a sample of size 49 from L2 index_2 <- as.integer( runif( n_2, 1, 5001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the mean of each sample xbar_1 = mean( samp_1 ) xbar_1 xbar_2 = mean( samp_2 ) xbar_2 # # So our difference of the sample means is samp_diff <- xbar_1 - xbar_2 samp_diff # # The distribution of the difference of the means will # be normal with standard deviation equal to # sqrt( sigma_1^2/n_1 + sigma_2^2/n_2 ) sd_difference <- sqrt( sigma_1^2/n_1 + sigma_2^2/n_2) sd_difference # # First the critical value approach: # # To run the test at the 0.02 level of significance # have the 2% we are missing split evenly on both # tails. But, since this is a normal distribution # the two z-values will just be opposites. We will # just find the upper value. # z <- qnorm( 0.02/2, lower.tail=FALSE ) z # then our low critical value will be -z*sd_difference # and our high critical value will be z*sd_difference # # REJECT H0 if samp_diff is less than the low # critical value or greater than the high value # # Second, the attained significance approach # # Because this is a two-tailed test we need to # find how strange it is to get the difference of # the means to be this strange, the value of samp_diff, # or stranger. # # Because this is a dynamic section of the script # samp_diff could be positive or negative. Here # we will use the fact that the normal distribution # is symmetric so we can just use the absolute # value of the difference of the sample means. # abs_diff <- abs( samp_diff) # # The the question is how strange would it be to # get that value of higher from a population that # is Normal with mean=0 and standard deviation # equal to sd_difference # attained <- pnorm( abs_diff, mean=0, sd=sd_difference, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # We could have taken the shorcut and used the # function that is provided. # source("../hypo_2known.R") hypoth_2test_known( sigma_1, n_1, xbar_1, sigma_2, n_2, xbar_2, 0, 0.02 ) # # ################################################# # ## Now, highlight and rerun, over and over, ## # ## lines 39-121, to get repeated samples and ## # ## thus, repeated 2% tests of the null ## # ## hyothesis that the means are the same. ## # ## While you do this be aware that the true ## # ## difference between the population means ## # ## is 1.163402. How often do your tests ## # ## reject the null hypothesis? ## # ################################################# # ----------------------------------------------------- # # We can do this all over again, but this time we will # create two populations that have means that differ by # a significant amount # # We will start by generating two populations # gnrnd5( 213472499901, 11225010176) L2 <- L1 head(L2,10) tail(L2,10) # sigma_2 <- pop_sd( L2 ) sigma_2 length(L2) # gnrnd5(279402599901, 13413010732) head(L1,14) tail(L1,14) sigma_1 <- pop_sd(L1) sigma_1 length(L1) # # Now that we have our two populations # and now that we know the standard deviation of each # population, we need to take random samples of the # two populations # # ############################################ # ## each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get a different ## # ## test of the null hypothesis. ## # ############################################ n_1 <- 37 # get a sample of size 43 from 37 index_1 <- as.integer( runif( n_1, 1, 6001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 49 # get a sample of size 49 from L1 index_2 <- as.integer( runif( n_1, 1, 5001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the mean of each sample xbar_1 = mean( samp_1 ) xbar_1 xbar_2 = mean( samp_2 ) xbar_2 # # So our difference of the sample means is samp_diff <- xbar_1 - xbar_2 samp_diff # # The distribution of the difference of the means will # be normal with standard deviation equal to # sqrt( sigma_1^2/n_1 + sigma_2^2/n_2 ) sd_difference <- sqrt( sigma_1^2/n_1 + sigma_2^2/n_2) sd_difference # # First the critical value approach: # # To run the test at the 0.02 level of significance # have the 2% we are missing split evenly on both # tails. But, since this is a normal distribution # the two z-values will just be opposites. We will # just find the upper value. # z <- qnorm( 0.02/2, lower.tail=FALSE ) z # then our low critical value will be -z*sd_difference # and our high critical value will be z*sd_difference # # REJECT H0 if samp_diff is less than the low # critical value or greater than the high value # # Second, the attained significance approach # # Because this is a two-tailed test we need to # find how strange it is to get the difference of # the means to be this strange, the value of samp_diff, # or stranger. # # Because this is a dynamic section of the script # samp_diff could be positive or negative. Here # we will use the fact that the normal distribution # is symmetric so we can just use the absolute # value of the difference of the sample means. # abs_diff <- abs( samp_diff) # # The the question is how strange would it be to # get that value of higher from a population that # is Normal with mean=0 and standard deviation # equal to sd_difference # attained <- pnorm( abs_diff, mean=0, sd=sd_difference, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # We could have taken the shorcut and used the # function that is provided. # source("../hypo_2known.R") hypoth_2test_known( sigma_1, n_1, xbar_1, sigma_2, n_2, xbar_2, 0, 0.02 ) # # ################################################# # ## Now, highlight and rerun, over and over, ## # ## lines 169-250, to get repeated samples and## # ## thus, repeated 2% tests of the null ## # ## hyothesis that the means are the same. ## # ## While you do this be aware that the true ## # ## difference between the population means ## # ## is 15.74752. How often do your tests ## # ## reject the null hypothesis? ## # #################################################